| Functions.1 | Functions.2 | Functions.3 |
|---|---|---|
| black_scholes | check_acc | desc_df |
| dump_definition | get_portfolio | gg_qq_plot |
| greeks | normalize | Qdate |
| quickplot | rebase | show_df |
Applied Statistics for Finance
Full dependencies list, click to expand list
Show code
when_rendering(dump_definition(file))$black_scholes
[1] "function (S = 100, K = 100, Time = 0.5, r = 0.05, sigma = 0.2, q = 0, type = \"call\", rounding = Inf) "
[2] "{"
[3] " d1 <- (log(S/K) + (r - q + sigma^2/2) * Time)/(sigma * sqrt(Time))"
[4] " d2 <- d1 - sigma * sqrt(Time)"
[5] " price <- switch(type, call = S * exp(-q * Time) * pnorm(d1) - K * exp(-r * Time) * pnorm(d2), put = K * exp(-r * Time) * pnorm(-d2) - S * exp(-q * Time) * pnorm(-d1))"
[6] " price <- ifelse(test = is.null(price), yes = \"ERROR\", no = round(price, rounding))"
[7] " return(price)"
[8] "}"
$check_acc
[1] "function (data1, data2, n = 10, title = NULL, visual = c(TRUE, FALSE, \"both\")) "
[2] "{"
[3] " stopifnot(length(data1) == length(data2), is.numeric(data1), is.numeric(data2))"
[4] " accuracy <- data.frame()"
[5] " for (i in 0:n) {"
[6] " new <- mean(round(data1, i) == round(data2, i), na.rm = TRUE)"
[7] " accuracy <- rbind(accuracy, new)"
[8] " }"
[9] " colnames(accuracy) <- \"values\""
[10] " cor_val <- cor(data1, data2, use = \"complete.obs\")"
[11] " mae_val <- mean(abs(data1 - data2), na.rm = TRUE)"
[12] " mse_val <- mean((data1 - data2)^2, na.rm = TRUE)"
[13] " diff_data <- data.frame(data1 = data1, data2 = data2) %>% head(5000) %>% mutate(index = index(.), diff = data1 - data2)"
[14] " diff_range <- range(diff_data$diff, na.rm = TRUE)"
[15] " diff_span <- diff_range[2] - diff_range[1]"
[16] " y_limits <- if (diff_span < 1e-06) {"
[17] " c(-1e-06, 1e-06)"
[18] " }"
[19] " else {"
[20] " NULL"
[21] " }"
[22] " diff_plot <- ggplot(diff_data, aes(x = index, y = diff)) + geom_point(color = \"darkred\", size = 0.7) + labs(subtitle = \"Difference\", x = NULL, y = NULL) + coord_cartesian(ylim = y_limits)"
[23] " accplot <- ggplot(accuracy, aes(x = 0:n, y = values)) + geom_line(linewidth = 1, color = \"dodgerblue2\") + geom_point(size = 2, color = \"dodgerblue3\") + scale_x_continuous(breaks = seq(0, n, by = 1)) + ylim(0, 1) + scale_y_continuous(labels = label_percent(), limits = c(0, 1)) + labs(subtitle = \"Percentage of accuracy\", y = NULL, x = \"Rounding decimals\")"
[24] " plot <- data.frame(data1 = data1, data2 = data2) %>% head(5000) %>% ggplot(aes(x = index(data1))) + geom_point(aes(y = data1, color = \"Data1\"), size = 2) + geom_point(aes(y = data2, color = \"Data2\"), size = 2) + scale_color_manual(values = c(Data1 = \"purple3\", Data2 = \"mediumseagreen\"), name = NULL) + labs(title = \"Visual inspection\", y = NULL, x = NULL) + theme(legend.position = \"bottom\")"
[25] " sum_df <- summary(data.frame(data1 = data1, data2 = data2))"
[26] " full_df <- data.frame(accuracy[accuracy != 0] * 100) %>% round(2) %>% data.frame() %>% set_colnames(\"Values !=0 %\")"
[27] " if (visual[1] == TRUE) {"
[28] " return(marrangeGrob(list(plot, diff_plot, accplot), layout_matrix = matrix(c(3, 3, 2, 2, 1, 1, 1, 1), nrow = 4, ncol = 2), top = title))"
[29] " }"
[30] " else if (visual[1] == FALSE) {"
[31] " return(list(distance = data.frame(COR = cor_val, MAE = mae_val, MSE = mse_val), df = full_df, sum_df = sum_df))"
[32] " }"
[33] " else if (visual[1] == \"both\") {"
[34] " return(list(plots = marrangeGrob(list(plot, diff_plot, accplot), layout_matrix = matrix(c(3, 3, 2, 2, 1, 1, 1, 1), nrow = 4, ncol = 2), top = title), df = full_df, sum_df = sum_df))"
[35] " }"
[36] "}"
$desc_df
[1] "function (data, quantiles = c(0.01, 0.25, 0.75, 0.99), digits = 4) "
[2] "{"
[3] " summary_stats <- function(x) {"
[4] " n <- sum(!is.na(x))"
[5] " mean <- mean(x, na.rm = TRUE)"
[6] " sd <- sd(x, na.rm = TRUE)"
[7] " median <- median(x, na.rm = TRUE)"
[8] " trimmed <- mean(x, trim = 0.1, na.rm = TRUE)"
[9] " min <- min(x, na.rm = TRUE)"
[10] " max <- max(x, na.rm = TRUE)"
[11] " range <- max - min"
[12] " skew <- sum((x - mean)^3, na.rm = TRUE)/(n * sd^3)"
[13] " kurtosis <- sum((x - mean)^4, na.rm = TRUE)/(n * sd^4) - 3"
[14] " se <- sd/sqrt(n)"
[15] " percent_missing <- sum(is.na(x))/length(x) * 100"
[16] " quantiles_values <- quantile(x, probs = quantiles, na.rm = TRUE)"
[17] " c(n = n, mean = mean, sd = sd, median = median, trimmed = trimmed, min = min, max = max, range = range, skew = skew, kurtosis = kurtosis, se = se, `%NA` = percent_missing, Q = quantiles_values[1], Q = quantiles_values[2], Q = quantiles_values[3], Q = quantiles_values[4])"
[18] " }"
[19] " stats <- sapply(data, function(col) {"
[20] " if (is.numeric(col)) "
[21] " summary_stats(col)"
[22] " else rep(NA, length(summary_stats(0)))"
[23] " })"
[24] " as.data.frame(round(t(stats), digits = digits))"
[25] "}"
$dump_definition
[1] "function (file) "
[2] "{"
[3] " mainEnv <- new.env()"
[4] " source_path <- \"C:/Users/pietr/OneDrive/Desktop/formula.main.R\""
[5] " source(source_path, local = mainEnv)"
[6] " custom_functions <- ls(envir = mainEnv)"
[7] " custom_functions <- custom_functions[sapply(custom_functions, function(x) is.function(get(x, envir = mainEnv)))]"
[8] " script_content <- readLines(file)"
[9] " script_words <- unlist(strsplit(script_content, \"\\\\W+\"))"
[10] " used_functions <- intersect(custom_functions, script_words)"
[11] " used_functions <- setdiff(used_functions, c(\"functions_loaded\", \"packages_loaded\", \"required_functions\", \"required_packages\", \"when_rendering\", \"isrendering\", \"if_rendering\"))"
[12] " if (length(used_functions) == 0) {"
[13] " return(cat(\"There are no functions coming from the main file\", \"\\n\"))"
[14] " }"
[15] " function_definitions <- list()"
[16] " for (func_name in used_functions) {"
[17] " if (exists(func_name, envir = mainEnv)) {"
[18] " func_obj <- get(func_name, envir = mainEnv)"
[19] " if (is.function(func_obj)) {"
[20] " func_text <- deparse(func_obj, width.cutoff = 500)"
[21] " function_definitions[[func_name]] <- func_text"
[22] " }"
[23] " }"
[24] " }"
[25] " return(function_definitions)"
[26] "}"
$get_portfolio
[1] "function (tickers_portfolio, start_date = Sys.Date() - 6540, end_date = Sys.Date(), fun = Ad, clean_names = F, na.locf = T, include_date = F) "
[2] "{"
[3] " data1 = lapply(tickers_portfolio, FUN = function(x) {"
[4] " fun(getSymbols(x, from = start_date, to = end_date, auto.assign = FALSE))"
[5] " })"
[6] " portfolio = do.call(merge, data1)"
[7] " if (na.locf == TRUE) {"
[8] " portfolio <- na.locf(portfolio)"
[9] " }"
[10] " if (clean_names) {"
[11] " colnames(portfolio) <- before_dot(portfolio)"
[12] " }"
[13] " if (include_date) {"
[14] " return(data.frame(DATE = index(portfolio), coredata(portfolio)))"
[15] " }"
[16] " return(portfolio)"
[17] "}"
$gg_qq_plot
[1] "function (data, title = NA) "
[2] "{"
[3] " realtitle <- ifelse(is.na(title), yes = \"QQ Plot\", no = paste(\"QQ Plot of\", title))"
[4] " df <- data.frame(sample = sort(data), theoretical = qnorm(ppoints(length(data))))"
[5] " fit <- lm(sample ~ theoretical, data = df)"
[6] " ggplot(df, aes(x = theoretical, y = sample)) + geom_hline(yintercept = 0, color = \"black\", linetype = \"dotted\", linewidth = 0.2) + geom_vline(xintercept = 0, color = \"black\", linetype = \"dotted\", linewidth = 0.2) + geom_point(color = \"blue\", size = 2) + geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], color = \"red\", linewidth = 1) + labs(title = realtitle, x = \"Theoretical Quantiles\", y = \"Sample Quantiles\") + theme_minimal()"
[7] "}"
$greeks
[1] "function (S = 100, K = 100, r = 0.05, sigma = 0.2, Time = 1, q = 0, type = \"call\", rounding = Inf) "
[2] "{"
[3] " d1 <- (log(S/K) + (r - q + sigma^2/2) * Time)/(sigma * sqrt(Time))"
[4] " d2 <- d1 - sigma * sqrt(Time)"
[5] " delta <- ifelse(type == \"call\", exp(-q * Time) * pnorm(d1), exp(-q * Time) * (pnorm(d1) - 1))"
[6] " gamma <- exp(-q * Time) * dnorm(d1)/(S * sigma * sqrt(Time))"
[7] " theta <- ifelse(type == \"call\", (-S * dnorm(d1) * sigma * exp(-q * Time)/(2 * sqrt(Time)) - r * K * exp(-r * Time) * pnorm(d2)), (-S * dnorm(d1) * sigma * exp(-q * Time)/(2 * sqrt(Time)) + r * K * exp(-r * Time) * pnorm(-d2)))"
[8] " vega <- S * exp(-q * Time) * dnorm(d1) * sqrt(Time)"
[9] " rho <- ifelse(type == \"call\", K * Time * exp(-r * Time) * pnorm(d2), -K * Time * exp(-r * Time) * pnorm(-d2))"
[10] " combined <- data.frame(delta = delta, gamma = gamma, theta = theta, vega = vega, rho = rho)"
[11] " return(round(combined, rounding))"
[12] "}"
$normalize
[1] "function (x, peak = 100) " "{"
[3] " MAX <- max(x, na.rm = TRUE)" " df <- (x/MAX) * peak"
[5] " return(df)" "}"
$Qdate
[1] "function (day, month, year) "
[2] "{"
[3] " date_string <- paste(year, month, day, sep = \"-\")"
[4] " date <- as.Date(date_string, format = \"%Y-%m-%d\")"
[5] " return(date)"
[6] "}"
$quickplot
[1] "function (data, title = NULL, plot_engine = c(\"ggplot\", \"plotly\"), xlab = \"Date\", ylab = \"Value\", show_legend = TRUE, subtitle = NULL, caption = NULL, linewidth = 0.4, legend_name = \"Variable\", legend_position = c(\"right\", \"left\", \"bottom\", \"top\"), alpha = 1, type = geom_line, facet_wrap = FALSE, x_size = 1, x_start = 1, x_step = 1, show_x = TRUE) "
[2] "{"
[3] " plot_data <- data.frame(Date = index(data), data)"
[4] " custom_palette <- rep(c(\"firebrick\", \"darkblue\", \"#006400\", \"gray30\", \"#457575\", \"#6100a8\", \"orange2\", \"brown\", \"#483D8B\", \"#556B2F\", \"#8B008B\", \"#5F9EA0\", \"#6B8E23\", \"#9932CC\"), 1000)"
[5] " my_data_long <- pivot_longer(data = plot_data, cols = -Date, names_to = \"Variable\", values_to = \"Value\")"
[6] " if (class(data)[1] != \"xts\") {"
[7] " my_data_long$Date <- my_data_long$Date/x_size"
[8] " }"
[9] " if (x_start != 1) {"
[10] " my_data_long$Date <- my_data_long$Date + x_start"
[11] " }"
[12] " plot <- ggplot(my_data_long, aes(x = Date, y = Value, color = Variable)) + type(linewidth = linewidth, alpha = alpha) + labs(title = title, subtitle = subtitle, caption = caption, x = xlab, y = ylab) + scale_color_manual(name = legend_name, values = custom_palette) + theme(legend.position = legend_position[1], plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))"
[13] " if (!show_legend) {"
[14] " plot <- plot + theme(legend.position = \"none\")"
[15] " }"
[16] " if (!show_x) {"
[17] " plot <- plot + theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())"
[18] " }"
[19] " if (facet_wrap) {"
[20] " plot <- plot + facet_wrap(~Variable)"
[21] " return(plot)"
[22] " }"
[23] " final_plot <- switch(plot_engine[1], ggplot = plot, plotly = ggplotly(plot) %>% layout(xaxis = list(rangeslider = list(visible = TRUE, thickness = 0.08)), yaxis = list(fixedrange = FALSE), dragmode = \"zoom\"))"
[24] " return(final_plot)"
[25] "}"
$rebase
[1] "function (data, start_value = 100, na.fill = 0, round = Inf) "
[2] "{"
[3] " data %>% ROC() %>% na.fill(fill = 0) %>% apply(., 2, delog) %>% round(digits = round) %>% as.data.frame()"
[4] "}"
$show_df
[1] "function (prices, n = 5, rounding = Inf, name_first_col = \"DATE\") "
[2] "{"
[3] " price_date <- cbind(index(prices), smart_round(data.frame(prices), rounding))"
[4] " colnames(price_date) <- (c(name_first_col, colnames(prices)))"
[5] " rownames(price_date) <- (1:length(index(prices)))"
[6] " first_rows <- head(price_date, n)"
[7] " last_rows <- tail(price_date, n)"
[8] " separator <- matrix(NA, nrow = 1, ncol = ncol(price_date))"
[9] " colnames(separator) <- (c(name_first_col, colnames(prices)))"
[10] " summary_table <- bind_rows(first_rows, as.data.frame(separator), last_rows)"
[11] " return(summary_table)"
[12] "}"
Packages required to run this file
| Required.Packages.1 | Required.Packages.2 | Required.Packages.3 |
|---|---|---|
| broom | dplyr | DT |
| e1071 | EnvStats | fBasics |
| foreach | forecast | GeneralizedHyperbolic |
| ggcorrplot | ggplot2 | ghyp |
| gt | gtExtras | hrbrthemes |
| knitr | library(“stats4”) | library(“viridis”) |
| lubridate | magrittr | moments |
| PerformanceAnalytics | plotly | purrr |
| quantmod | reshape2 | reticulate |
| rugarch | Runuran | sde |
| Sim | Sim.DiffProc | stats4 |
| tidyr | timeDate | timeSeries |
| tseries | TTR | VarianceGamma |
| viridis | viridisLite | xts |
| yuima | zoo |
Functions defined in this document
| Required.Functions.1 | Required.Functions.2 | Required.Functions.3 |
|---|---|---|
| bprint | aacplot | BM.1 |
| BM.2 | BM.vec | GBM.vec |
| GBM.vec_inv | log.lik | est.log.lik |
| NLL | f | mean_correcting_martingale |
| call.price | put.price | MCPrice |
| h | FFTcall.price | phi.tilde |
| psi | phiBS | VG |
| VG.vec | plot_density_comparison | VGexp |
| phiVG | MX | LSM |
| fraoneshow | bs_price | implied_vol |
| objective |
1 Random number generators and Monte Carlo
Note on forward looking algorithms
Much of this subject is based on a generative function that is forward looking of pseudo random values \(x_{n+1} = f(x_n)\) given \(x_{n+1}\) is not possible to obtain \(x_n\) and given the numbers often times its not possible to invert the results and get back the function \(f(x)\)
1.1 Random number generators
Computers follow deterministic patterns so there is no possibility of randomness, thats why we need to create it starting from the uniform distribution. It takes a number from 0 and 1 equally distributed set seed allows replicability, if you set the seed immediately before running runif you get the same numbers.
All sequences have the property that they are infinitely repeating the thing that changes is the seed, the sequence of numbers is from 0 to 1 and starts from a different point. the set.seed function allows to change the starting position of the seed, starting from the value in position 123 and stepping from there in the sequence
it’s important to set the seed to allow for reproducibility of the work, when working with someone else or in different times
Show code
hist(runif(1000), nclass = 1, xlim = c(0, 3))Show code
quickplot(runif(1000), title = "Uniform distributions", show_legend = F)Show code
replicate(10, runif(20)) %>%
quickplot(title = "Simulation of different Uniform distributions", linewidth = 0.6,
show_legend = F)Show code
set.seed(123)
runif(10) [1] 0.2875775 0.7883051 0.4089769 0.8830174 0.9404673 0.0455565 0.5281055
[8] 0.8924190 0.5514350 0.4566147
Show code
set.seed(123)
runif(10) [1] 0.2875775 0.7883051 0.4089769 0.8830174 0.9404673 0.0455565 0.5281055
[8] 0.8924190 0.5514350 0.4566147
for almost all applications of statistics a uniform distribution isn’t enough, so you need a way to generate out of other distributions, there are two methods to do this, the inverese transformation and the acceptance rejection method.
For discrete random variables, the inverse \(f^{-1}(x)\) is always easy to obtain like in the case of the exponential distribution (which is a continuous distribution but it works the same way). Instead of simulating from an exponential I can take the uniform distribution and isolate the \(x\) as a function of \(Unif(0,1)\), and from this inverse formula i can produce numbers out of it
\[ F(x) = 1 − e^{−λx} \]
\[ F(x)^{-1} \ \ \rightarrow \ \ X= −\frac1λln(U) \]
Alternatively you could solve numerically and try and find parameters that get to the same results as the function, however it needs a lot of attempts to converge. So, it’s generally not reccomended
Show code
# inverse transform method
X <- runif(1000)
lambda <- 5
Y <- exp(-lambda * X)
Y_inv <- (-1/lambda) * log(Y)
check_acc(X, Y_inv, n = 20)Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
There are some cases that are more complex and you cannot invert it, even with the iterative process, for example the normal distribution, however you can solve it numerically, (still not reccomended)
This brings us to the so-called acceptance-rejection method.
Let \(x\) be a random variable with density \(f\) for which no RNG exists. Assume you know how to simulate from Y with density g and there exists a constant \(c > 0\) such that
\[ \max \frac{f(x)}{cg(x)}< 1 \ \ \ \ \ \forall x \]
Get the distribution \(f(x)\) (target) and another distribution \(g(x)\) (envelope) The 2 distribution need to be similar enough so that the distribution stays in the envelope but not so big that everything is rejected for this example lets use a normal PDF and a uniform distribution PDF
You can also work on \(c\) which as it gets smaller there is less wasted area but also the risk of cutting off the area of the acutal function, so it needs to be somewhere in the middle, however if the distribution is a poor fit there is nothing that the \(c\) can do on its own
Show code
dn <- dnorm(-400:400/100)
c <- 1
data.frame(density = dn, x = (1:801)/801, unif = 1) %>%
ggplot(aes(x = x)) + geom_line(aes(y = density)) + geom_ribbon(aes(ymin = 0,
ymax = 1), alpha = 0.3, color = "black") + xlim(c(0, 1.5)) + labs(title = "Acceptance-rejection method",
subtitle = paste0("c=", c), x = paste0("Result of the function ", round(max(dn/(c *
1)), 4)))Show code
max(dn/(c * 1))[1] 0.3989423
In this case we can obviously see that the uniform distribution is not a good fit and we cant use it to approximate. Therefore i need a different distribution, the t-student seems to be a better fit, after looking for a better \(c\) which is big enough so that \(cg(x)\) can contain \(f(x)\), but I want the bare minimum for that to happen: the bigger \(cg(x)\) the higher the risk of falling into the rejection area and therefore of simulating values that I cannot use. but too small and the target spills from the envelope, meaning that those values will not be simulated and in these cases it’s a big mistake to use this (graphically it’s where the line is higher than the shaded area)
Show code
c <- 2
dt_stud <- dt(x = -400:400/100, df = 1)
data.frame(density = dn, x = (1:801)/801, tnorm = dt_stud) %>%
ggplot(aes(x = x)) + geom_line(aes(y = density)) + geom_ribbon(aes(ymin = 0,
ymax = tnorm * c), alpha = 0.3, color = "black") + labs(title = "Acceptance-rejection method",
subtitle = paste0("c=", c), x = paste0("Result of the function ", round(max(dn/(c *
dt_stud)), 4)))Show code
max(dn/(c * dt_stud))[1] 0.7601735
Show code
c <- seq(0.9, 3, length.out = 12)
aacplot <- function(c) {
df <- data.frame(density = dn, x = (1:801)/801, tnorm = dt_stud)
ggplot(df, aes(x = x)) + geom_line(aes(y = density)) + geom_ribbon(aes(ymin = 0,
ymax = tnorm * c), alpha = 0.3, color = "black") + geom_ribbon(aes(ymin = density,
ymax = ifelse(density > tnorm, density, NA)), fill = "red", alpha = 0.5) +
ggtitle(label = NULL, subtitle = paste(paste0("c=", round(c, 2)), "\nresult",
round(max(dn/(c * dt_stud)), 4))) + theme_void()
}
suppressWarnings(grid.arrange(grobs = lapply(c, aacplot), nrow = 3))in this very simple case a value of 1.66 is the best option
For any complex distribution some programmer went and developed the inverse to then apply from the uniform distribution
| Random Variable | Generator |
|---|---|
| Bin(n, p) | rbinom(r, n, p) |
| Bin Neg(n, p) | rnbinom(r, n, p) |
| Geom(p) | rgeom(r, p) |
| Iperg(N,K, n) | rhyper(r,K, N − K, n) |
| Poisson(λ) | rpois(r, λ) |
| U(a, b) | runif(r, min = a, max = b) |
| Exp(a, b) | rexp(r,rate = λ) |
| Normal(µ, σ) | rnorm(r, mean = µ,sd = σ) |
| \(t_\nu\) | rt(r, df = ν) |
| \(\chi_\nu^2\) | rchisq(r, df = ν) |
| F(\(ν1\), \(ν2\)) | rf(r, df1 = ν1, df1 = ν2) |
| Gamma(α, β) | rgamma(r,shape = α,rate = 1/β,scale = β) |
| Beta(α, β) | rbeta(r,shape1 = α,shape2 = β) |
Commmon question: When to use one or the other?
You use the inverse transformation method when you can isolate \(x\), from the function, typically with discrete distributions or the exponential distribution for continuous cases. If isolating \(x\) is not possible, you use the acceptance-rejection method, being careful to use compatible distributions and working on \(c\) to better the fit without cutting part of the distribution off.
1.2 The Monte Carlo Method
The Monte Carlo Method is a numerical method based on statistical arguments which can be used to generate draws from a probability distribution and to evaluate integrals. In particular, the expected value of a random variable X with density \(f(·)\) is an integral of this form
\[ \mathbb E (X) = \int x·f(x)dx \]
we use the Monte Carlo Method to evaluate the expected payoff of some derivative in order to price it. This is what most of this course focuses on
thanks to the Law of Large Numbers in addition with the Central Limit Theorem we a correct estimation of the distribution ending points is the average
\[ \mathbb E\Big(g(Y)\Big) \cong \frac1n \sum_{i=1}^n g(y_i) = \bar g_n \]
additionally we can draw the distribution based on this thanks to CLI and LLN
\[ \frac1n \sum_{i=1}^n g(y_i) \sim N\bigg(\mathbb E\Big(g(Y)\Big), \frac1nVAR\Big(g(Y)\Big)\bigg) \]
and we can use this to actually price the option Get the mean and discount it to \(t_0\) Stocks that don’t execute option still count and lower values
\[ Price=e^{-rt}⋅E[max(0,S-K] \]
Show code
MNC_Sim <- cbind(Sim.DiffProc::GBM(N = 100, M = 100, x0 = 50, theta = 0.05, sigma = 0.2),
Sim.DiffProc::GBM(N = 100, M = 1, x0 = 50, theta = 0.5, sigma = 0.2) * 2 - 50)
K <- 60
MNC_Sim %>%
quickplot("Montecarlo simulation of geometric brownian motion", show_legend = F,
x_size = 100) + geom_hline(aes(yintercept = K), linewidth = 1.2, color = "red")Show code
mean_option <- mean(ifelse(last(MNC_Sim) > K, yes = last(MNC_Sim), 0))
cat("Mean of last datapoints", mean_option, "\n")Mean of last datapoints 14.2655
Show code
cat("Price of option according to MNC", exp(-1 * 1) * mean_option)Price of option according to MNC 5.247984
Commmon question: Why use variance reduction techniques?
In some cases, Monte Carlo intervals are not very informative if the variance of Y = g(X) is too large, as variability affects stability of the Monte Carlo method. This could be the case with some extreme outliers in the process (here done manually). In this case the process requires variance reduction techniques. They are discussed later when we can actually simulate GBM
The price of a derivative is given by the general formula
\[ Pt = e^{−r (T−t)}\mathbb E{f(S_T)} \]
where \(f(·)\) is some payoff function, e.g. \(f (ST ) = max(S_T − K, 0)\) for a European call option with strike price K, maturity T. We need to estimate via Monte Carlo the expected value and, to this end, we need to be able to simulate the values of ST . So we need to know how to simulate stochastic processes.
\[ S_t=x\cdot e^{\big(r-\frac{σ^2}{2}\big)\cdot t+σ\ W_t} \] with \(t>0\) and \(x=S_0\)
Show code
replicate(15, cumsum(rnorm(100))) %>%
quickplot(title = "Montecarlo simulation of rownian motion", subtitle = "No variance reduction",
show_legend = F, x_size = 100)2 Simulation of Stochastic Processes
A process that not only generates random numbers but also includes some drift component like the average growth coupled with a unexpected component.
A stochastic process is a collection of random variables indexed by time, typically denoted as \({X_t} t∈Γ\), where each \(X_t\), maps outcomes from a probability space to real values, representing the evolution of a random system over time. It traces a trajectory or sample path, showing how the system behaves over time under that specific model. In practice, especially for simulation and numerical analysis, continuous-time processes are often approximated by evaluating them at finitely many discrete time points—called a time grid—which discretizes the interval into “grid-points”, turning the process into a sequence of values at those specific times.
Trajectory or path of the random process, seen as a function of time. A trajectory represents the dynamic evolution of the stochastic process in time.
We divide this family of processes and variables along 2 axis and 4 categories
| State space | |||
| discrete | continous | ||
| Time | discrete | Random walks Markov Chains | ARIMA, GARCH |
| continous | Poisson | Lévy Wiener Diffusion SDE |
Continuous time and discrete time can be evaluated at any point in time vs a discrete model that only allows evaluation at only specified times
this is not possible in a practical and computational meaning, if you zoom infinitely you will always find space between the points.
So we include the concept of grid points (/time discretization points) allowing us to turn a continuous function to a discrete function \(0→T\) divided into 100 points means that my “continuous time function” is discrete in 100 points
2.1 Wiener process and basic Brownian motion
We use the notation \(W_t\) or \(B_t\) interchangeably
Brownian motion (or Wiener process) is a stochastic process \((B_t,t ≥ 0)\) starting from \(0\) at \(t_0\), i.e. \(B0 = 0\), with the following properties:
Independent increments What happens from one discretization point to the next is all independent (yesterday doesn’t influence tomorrow)
\(B_t − B_s\) and \(B_u − B_v\) with \(t,s,u,v\) being different points in time, independent for \((s,t) ∩ (v, u) = ∅\)
stationary increments But the distribution assumption doesn’t change \(dB = B_t-B_s\) depends only on \(t − s\) and not by \(t\) or \(s\) separately
Gaussian increments most known property and most basic \(dB \sim N(0,t − s)\) and\(Bt \sim N(0,t)\)
it requires first finding out your grid points, take your maturity time \(T\) and divide it into as many steps you want \(N\), step_size = N makes so that the x axis goes from 0 to \(T\)
\[ W(t_i) = W(t_{i−1}) + z ·\sqrt{∆t} \]
Very commmon question: Is the wiener process a markovian and a martingale?
these concepts will be introduced later…
Always markovian since you only take the last value, and a martingale since the average of the drift starting from a standard normal distribution is of mean 0
Show code
N <- 100 # number of end-points of the grid including Time
Time <- 1 # length of the interval [0, Time] in time units
Delta <- Time/N # time increment (dt)
W <- numeric(N + 1) # initialization of the vector W
Time <- seq(0, 1, length = N)
for (i in 2:(N + 1)) {
W[i] <- W[i - 1] + rnorm(1) * sqrt(Delta)
}
quickplot(W, title = "Simulation of Wiener processes", show_legend = F, x_size = N,
linewidth = 1)Different ways to make it more efficient using system.time to get the accurate time of the ways to compute, since microbenchmark gives a weird result sometimes, this means that from now on I will be using BM.vec to get the Brownian motion
Show code
set.seed(123)
# brutal code
BM.1 <- function(N = 10000) {
W <- NULL
for (i in 2:(N + 1)) W <- c(W, rnorm(1)/sqrt(N))
return(W)
}
system.time(BM.1()) user system elapsed
0.16 0.00 0.18
Show code
set.seed(123)
# smarter
BM.2 <- function(N = 10000) {
W <- numeric(N)
Z <- rnorm(N - 1)
for (i in 2:(N)) W[i] <- W[i - 1] + Z[i - 1]/sqrt(N)
return(W)
}
system.time(BM.2()) user system elapsed
0.02 0.00 0.02
Show code
set.seed(123)
# awesome!
BM.vec <- function(N = 10000) {
W <- c(0, cumsum(rnorm(N - 1)/sqrt(N - 1)))
return(W)
}
system.time(BM.vec()) user system elapsed
0 0 0
Its no differentiable so you do it by hand basically by getting 2 points, one at \(t=a\) and the next at \(t=a+∆t\) and make a \(∆t→0\) this gets you the value which goes to \(+∞\)
\[ \lim_{∆t→0} \frac{|W (a + ∆t) − W (a)|}{∆t} ≃ \lim_{∆t→0} \frac{|\sqrt{∆t}|}{∆t}=+∞ \]
Show code
dt <- seq(from = 0, to = 0.01, length.out = 100)
a <- 1
quickplot(abs((rnorm(1) * sqrt(a + dt)) - (rnorm(1) * sqrt(a)))/dt, title = "Differential limit in 0 goes to infinity",
linewidth = 1, show_legend = F)Which as we said before we said that the lim is infinity, it means that the stochastic process you will not be using the component in of itself but you will use the integral (integral of the derivative cancel each other out and allows the process to not go to infinity)
A stochastic differential equation models the noise (or the stochastic part) of this system by adding the variation of some stochastic process to the above dynamics, e.g. the Wiener process
Commmon question: What are the 2 components of a stochastic differntial equation?
deterministic trend + stochastic noise
\[ X_t=X_0+\int_{0}^{t}\Big(b(X_S)ds\Big)+\int_{0}^{t}\Big(\sigma\left(X_S\right)dW_S\Big) \]
2.2 Geometric Brownian motion
Why Is the Wiener Process Insuficicient?
You can’t use the wiener process to simulate the stock path - It goes in the negative which stocks can’t - It starts from 0 - It has no trend So, we need a stochastic differential equation
A stochastic differential equation models the noise (or the stochastic part) of this system by adding the variation of some stochastic process to the above dynamics, e.g. the Wiener process, also it can go below \(S_0\) but not below \(0\)
\[ \frac{X_{t+dt} − X_t}{dt} = \frac{dX_t}{dt}= b(X_t) \]
\[ dX_t = b(X_t)dt + σ(X_t)dW_t \]
Evolution through time \(b(X_t)dt\) Deterministic trend \(σ(X_t)\) Stochastic noise \(dW_t\)
Differential because it is based on increments derivatives Which as we said before we said that the lim is infinity, it means that the stochastic process you will not be using the component in of itself but you will use the integral (integral of the derivative cancel each other out and allows the process to not go to infinity)
Commmon question: How is B&S modelled and what is the closed form solution?
In the Black & Scholes theory of option pricing, we model an underlying asset using a stochastic process St called geometric Brownian motion which satisfies the stochastic differential equation
\[ dS_t = µS_tdt + σS_tdW \]
Finally giving us the final closed formula solution
\[ S_t=x_0\cdot EXP\bigg\{\bigg(r-\frac{\sigma^2}{2}\bigg)t+σW_t \bigg\} \ \ \ \ \ \ t>0 \ \ \ \ \ \ S_0=x \]
With all time steps all normally distributed, With no skew or kurtosis, so no spikes in the movement
No spike component means that it will trend to the drift component in the long run
Show code
r <- 1
sigma <- 0.5
x <- 10
N <- 100 # number of grid points including maturity Time
Time <- 1 # length of the interval [0, Time] in time units
Delta <- Time/N # time increment (dt)
W <- numeric(N + 1) # initialization of the vector W
Time <- seq(0, Time, length = N + 1)
for (i in 2:(N + 1)) {
W[i] <- W[i - 1] + rnorm(1) * sqrt(Delta)
}
S <- x * exp((r - sigma^2/2) * Time + sigma * W)
quickplot(data.frame(StockPath = S), title = "geometric Brownian motion", x_size = N,
linewidth = 1)Show code
GBM.vec <- function(N = 100, x = 10, r = 1, sigma = 0.5, Time = 1) {
# r = 1 sigma = 0.5 x = 10 N = 100 # number of grid points including
# maturity Time Time = 1 # length of the interval [0, Time] in time units
Delta <- Time/N # time increment (dt)
W <- numeric(N + 1) # initialization of the vector W
Time <- seq(0, Time, length = N + 1)
for (i in 2:(N + 1)) W[i] <- W[i - 1] + rnorm(1) * sqrt(Delta)
S <- x * exp((r - sigma^2/2) * Time + sigma * W)
return(S)
}
replicate(10, GBM.vec()) %>%
quickplot("Montecarlo simulation of geometric brownian motion", x_size = 100,
show_legend = F)2.3 Variance reduction techniques
The most basic approach is to use antithetic sampling.
It is based on the assumption that if you take half of the distribution positive and the rest negative it leaves the the distribution unchanged (for example, if X is Gaussian, then −X is Gaussian as well).
Instead of simulating 1000 stock paths, I get only 500 from f(x) and the rest from f(-x) Which is the Antithetic part
Balancing back a outlier in the distribution with an equal number of simulations from the other side of the distribution, making it equally probable that I pick the same amounts of outliers
This only works when the distribution has the same distributional assumptions for \(X\) and \(-X\)
approach from slides
\(y_1\) is computed using x, while \(y_2\) is computed using -x (its antithetic counterpart).
The function being evaluated resembles a payoff function (e.g., a put option with an exponential term).
The idea is that if \(x\) produces an extreme value in one direction, \(−x\) produces an opposite (but compensating) value, making the estimate more stable.
Compared to a theoretical benchmark an analytic formula for the expectation being estimated. Pricing formula for a European-style option
Show code
n <- 1000
beta <- 1
K <- 1
x <- rnorm(n)
y1 <- sapply(x, function(x) max(0, K - exp(beta * x)))
y2 <- sapply(-x, function(x) max(0, K - exp(beta * x)))
mean((y1 + y2)/2) # MC with antithetic sampling[1] 0.2442896
Show code
K * pnorm(log(K)/beta) - exp(beta^2/2) * pnorm(log(K)/beta - beta)[1] 0.2384217
Graphical approach using geometric brownian motion, with the 2 equations being: A normal geometric brownian motion
\[ S_t=x_0\cdot EXP\bigg\{\bigg(r-\frac{\sigma^2}{2}\bigg)t+σW_t \bigg\} \ \ \ \ \ \ t>0 \]
And an inverse geometric brownian motion (\(-\sigma W_t\))
\[ S_t=x_0\cdot EXP\bigg\{\bigg(r-\frac{\sigma^2}{2}\bigg)t-σW_t \bigg\} \ \ \ \ \ \ t>0 \]
Show code
# another function with
GBM.vec_inv <- function(N = 100, x = 10, r = 1, sigma = 0.5, Time = 1) {
# r = 1 sigma = 0.5 x = 10 N = 100 # number of grid points including
# maturity Time Time = 1 # length of the interval [0, Time] in time units
Delta <- Time/(N + 1) # time increment (dt)
W <- numeric(N) # initialization of the vector W
Time <- seq(0, Time, length = N)
for (i in 2:N) W[i] <- W[i - 1] + rnorm(1) * sqrt(Delta)
S <- x * exp((r - sigma^2/2) * Time - sigma * W)
return(S)
}
N <- 100 # number of grid points including maturity Time
M <- 1000 # number of simulations (min 3)
x <- 10
r <- 1
sigma <- 0.5
Time <- 1
norm <- replicate(M, GBM.vec(N = N, x = x, r = r, sigma = sigma, Time = Time))
inv <- replicate(M, GBM.vec_inv(N = N, x = x, r = r, sigma = sigma, Time = Time))
long_norm <- melt(norm[, seq(from = 1, to = min(50, M), by = 2)]) %>%
mutate(Var1 = Var1/N)
long_inv <- melt(inv[, seq(from = 1, to = min(50, M), by = 2)]) %>%
mutate(Var1 = Var1/N)
ggplot() + geom_line(data = long_norm, aes(x = Var1, y = value, group = Var2, colour = "Normal"),
linewidth = 1) + geom_line(data = long_inv, aes(x = Var1, y = value, group = Var2,
colour = "Inverted"), linewidth = 1) + scale_color_manual(values = c(Normal = "darkblue",
Inverted = "darkred")) + labs(title = ifelse(M > 50, paste("50 simulations out of",
M, "total"), paste(M, "simulations")), subtitle = "Color coded split half normal half inverted",
x = "time", y = "Value of GBM")Show code
other_half_no_VRT <- replicate(M/2, GBM.vec(N = N, x = x, r = r, sigma = sigma, Time = Time))[N,
]
sim_no_VRT <- c(other_half_no_VRT, norm[N, ])
df <- data.frame(Category = c("BASE NORM", "BASE INV", "VRT", "NO VRT", "BASE OTHER HALF"),
Variance = c(var(norm[N, ]), var(inv[N, ]), var(c(norm[N, ], inv[N, ])), var(sim_no_VRT),
var(other_half_no_VRT)))
ggplot(df, aes(x = Category, y = Variance, fill = Category)) + geom_bar(stat = "identity") +
labs(title = "Variance Comparison", x = "Category", y = "Variance") + theme(legend.position = "none") +
coord_cartesian(ylim = c(min(df$Variance) - 10, max(df$Variance) + 10)) # Zoom without data lossNORM variance: 204.13
INV variance: 207.47
Variance with VRT of my simulations: 205.76
Variance no VRT, just the normal simulations and another M=500 simulations: 210.2
there is no guaranteed way to reduce variance this only happens when the the antithetic part actually reduces variance, it could have an extreme far bigger than the regular simulations and create more variance
Therefore, we have a variance reduction if \[ Var \frac12 \bigg(g(X) + g\Big(h(X)\Big)\bigg) < Var\Big(g(X)\Big) \]
2.3.1 Other examples of common stochastic processes in their closed formula
Geometric Brownian motion (gBm) \(dX_t = µX_t d_t + σX_t dW_t\) Standard model for asset prices in Black-Scholes, lacks mean reversion.
Cox-Ingersoll-Ross (CIR) \(dX_t = (θ_1 + θ_2X_t)dt + θ_3 \sqrt{X_t}dW_t\) Mean reverting property variation according to wiener process but mean reverting in \(\lim(t)\) Ensures non-negative interest rates due to the square root term
Chan-Karolyi-Longstaff-Sanders (CKLS) \(dX_t = (θ_1 + θ_2X_t)dt + θ_3X^{θ_4}_t dW_t\) mean reverting but with 2 variation component allowing more stray from trend Still no spikes component outright Generalizes CIR by allowing more flexible volatility behavior.
Nonlinear mean reversion (Ait-Sahalia) \(dX_t = (α_{−1} X^{−1}_t + α_0 + α_1 X_t + α_2 X^2_t)dt + β_1X^ρ_t dW_t\) Can capture extreme deviations from equilibrium due to nonlinear drift.
Double Well potential (bimodal behavior, highly nonlinear) \(dX_t = (X_t – X^3_t)dt + dW_t\) System fluctuates between two stable states, useful for modeling regime shifts.
Jacobi diffusion (political polarization) \(dX_t = −θ\left(X_t - \frac12\right)dt +\sqrt{θX_t(1 – X_t)}dW_t\) Constrains values to the interval (0,1), suitable for proportions like voting shares.
Ornstein-Uhlenbeck (OU) \(dX_t = θX_tdt + dW_t\) Also mean reverting also used for interest rates Widely used for modeling interest rates and volatility.
Radial Ornstein-Uhlenbeck \(dX_t = (θX^{−1}_t - X_t)dt + dW_t\) Ensures positive values, often used in stochastic volatility models.
2.3.2 SDE package
the sde package allows to simulate and use stochastic differential equations, the main function is sde.sim one of the main features of this function is the model which allows for multiple different complex models to be used CIR Cox-Ingersoll-Ross, VAS Vaisicek, OU Ornstein-Uhlenbeck, BS Geometric browninan motion used for Black and scholes
Show code
n <- 100 # number of simulation steps.
Delta <- 0.01 # time step of the simulation
S0 <- 10 # fix initial value
mu <- 0.1 # fix the drift value
sigma <- 0.2 # and one for the volatility
## theta = vector of parameters for cdist
sde.sim(X0 = S0, N = n, delta = Delta, model = "BS", theta = c(mu, sigma)) %>%
quickplot(title = "Geometric Brownian motion", show_legend = F)Show code
sde.sim(X0 = S0, N = n, M = 10, delta = Delta, model = "BS", theta = c(mu, sigma)) %>%
quickplot("Montecarlo simulation of SDE Brownian motion", subtitle = "`M` allows to simulate more than one path",
show_legend = F)2.3.3 Markov property
The Markov property is based on the filtration principle captures how much past conditional information a function has, as processes are backward-looking. For a 1-year filtration, each point has the past 252 trading days’ data.
A stochastic process is Markovian if the conditional expectation of future values depends only on the current state, not the entire history: \[ E(X_n|F_{n-1}) = E(X_n|X_{n-1}, X_{n-2},\dots)= E(X_n|X_{n-1}) \]
A discrete-time process \(\{X_n, n ≥ 1\}\) is Markovian if the conditional distribution of \(X_n\) given the past equals its distribution given only \(X_{n-1}\). Past values provide no additional information.
The Wiener process and geometric Brownian motion (gBm) are Markov processes—their closed-form solutions depend only on \(x_0\). However, this lack of memory limits their ability to capture past patterns, as the gBm SDE doesn’t use historical data after estimating parameters.
In short, for a Markov process, the entire history is equivalent to just the last data point.
2.3.4 Martingale
A martingale is a stochastic process where the expected future value, given all past information, equals the current value. Formally, for a discrete-time process \(\{X_n\}\):
\[ E(X_{n+1}|X_1,\dots,X_n)=X_n \]
If \(E(X_{n+1}|X_1, ..., X_n) ≥ X_n\) it is a submartingale. positive drift
If \(E(X_{n+1}|X_1, ..., Xn) ≤ X_n\) it is a supermartingale. negative drift
Geometric Brownian Motion (gBm) is a martingale only when the drift parameter is zero. Otherwise:
This property reflects “fair” evolution (martingale), upward bias (submartingale), or downward bias (supermartingale).
Basically what is the drift, a true martingale is when the drift component is 0
Commmon question: Is this a Martingale and Markovian process?
In order for a sde with more than 2 components to be a martingale either they are all 0 or cancel each other out
| Model | Martingale? | Markov? |
|---|---|---|
| Wiener process (W) | Yes \(E[N(0,1)]=0\)) | ✅ Yes |
| Geometric Brownian Motion (gBm) | Only if drift = 0 (μ=0) | ✅ Yes |
| Cox-Ingersoll-Ross (CIR) | Can be \(-θ_1=θ_2 \ \text{or} \ θ_1=θ_2=0\) | ✅ Yes |
| CKLS (Chan-Karolyi-Longstaff-Sanders) | No | ✅ Yes |
| Ornstein-Uhlenbeck (OU) | Can be if driftless | ✅ Yes |
| Variance Gamma (VG) | Can be if drift compensated \(|b|=|a|\) | ✅ Yes |
| Meixner | Can be if symmetric | ✅ Yes |
2.3.5 Discertization / Simulation strategies
they are all algorithms to estimate the values according to sde
3 types of discretization methods in sde.sim (From most narrow to most broad):
Exact sampling
EA(GBM) if you know everything about the model and use either reverse or acceptance rejection methodConditional distribution
cdist(CIR OU GBM) Get the distribution starting from the starting value, since all are markovian once you have simulated all the values look at today, get the distribution conditional on the value of todayDiscretization methods (Every other sde) take your function with the integral since there are no closed for solutions, solving both component numerically, least efficient so if you can use the others
Discretization methods:
eulercommon,milsteinanother common,KPS,milstein2,ozaki,shoji. Discretization just means to have the integral of the sde and you need to solve it for every time step
\[ S_{t + dt} = S_t + \int_t^{t+dt} \mu (S_u,u)du + \int_t^{t+dt} \sigma (S_u,u)dWu \]
Essentially this is 2 big integrals for mu and sigma
All of these can be used and to get the best result is to try many of them Empirically the only difference is that the Euler is just slower than the Milstein
The trend component in the process can be estimated with historical data and the estimate can be used to figure out the direction of the path
In levy processes there is another component with the jump so a third integral
Commmon question: What is the difference between Millstein and Euler?
2 discretization methods it just means that you are doing the integral of the sde and you need to solve it for every time step. All of these can be used and to get the best result is to try many of them. Empirically the only difference is that the Euler is just slower than the Milstein
Show code
set.seed(1222)
Time <- 2
N <- 100
dt <- Time/N
X0 <- 40
mu_v <- 0.05
sigma_v <- 0.2
r <- 0.05
K <- 3
# Simulate GBM paths using different methods
gbm_euler <- sde.sim(X0 = X0, N = N, model = "BS", theta = c(mu_v, sigma_v), method = "euler")
gbm_milstein <- sde.sim(X0 = X0, N = N, model = "BS", theta = c(mu_v, sigma_v), method = "milstein")
gbm_exact <- sde.sim(X0 = X0, N = N, model = "BS", theta = c(mu_v, sigma_v), method = "EA")
# Plot paths
data.frame(euler = gbm_euler, milstein = gbm_milstein, exact = gbm_exact) %>%
quickplot()Show code
# dirty visualization on deliberately few paths to better see the convergence
nsim <- 20
euler_option <- mean(pmax(0, last(replicate(nsim, sde.sim(X0 = X0, N = 1, T = Time,
model = "BS", theta = c(mu_v, sigma_v), method = "euler"))) - K)) * exp(-r *
Time)
milstein_option <- mean(pmax(0, last(replicate(nsim, sde.sim(X0 = X0, N = 1, T = Time,
model = "BS", theta = c(mu_v, sigma_v), method = "milstein"))) - K)) * exp(-r *
Time)
EA_option <- mean(pmax(0, last(replicate(nsim, sde.sim(X0 = X0, N = 1, T = Time,
model = "BS", theta = c(mu_v, sigma_v), method = "EA"))) - K)) * exp(-r * Time)
BS_ref_option <- black_scholes(S = X0, K = K, Time = Time, r = r, sigma = sigma_v,
type = "call")
data.frame(euler = c(euler_option, euler_option - BS_ref_option, abs(euler_option -
BS_ref_option)), milstein = c(milstein_option, milstein_option - BS_ref_option,
abs(milstein_option - BS_ref_option)), EA = c(EA_option, EA_option - BS_ref_option,
abs(EA_option - BS_ref_option)), BS = c(BS_ref_option, NA, NA)) %>%
round(4) %>%
set_rownames(c("Price", "Diff with BS", "Abs diff")) %>%
gt(rownames_to_stub = T) %>%
data_color(direction = "row", rows = 3, na_color = "white", colors = c("green",
"gainsboro", "red"), alpha = 0.2, autocolor_text = F) %>%
tab_header("Price and difference with the BS function", subtitle = paste0("Deliberately few paths (",
nsim, ") to better see the convergence"))| Price and difference with the BS function | ||||
| Deliberately few paths (20) to better see the convergence | ||||
| euler | milstein | EA | BS | |
|---|---|---|---|---|
| Price | 39.9877 | 35.4608 | 37.5214 | 37.2855 |
| Diff with BS | 2.7022 | -1.8247 | 0.2359 | NA |
| Abs diff | 2.7022 | 1.8247 | 0.2359 | NA |
If your function is not one of the ones done in sde you can create 2 expressions one for the drift and one for the variance OU process, \(dXt = −5X_tdt + 3.5dW_t\)
The package uses by default the best method for the model if it is specified
Show code
d <- expression(-5 * x)
s <- expression(3.5)
X <- sde.sim(X0 = 10, drift = d, sigma = s)sigma.x not provided, attempting symbolic derivation.
Show code
quickplot(sde.sim(X0 = 10, drift = d, sigma = s, M = 100, ), title = "OU process user defined",
show_legend = F)sigma.x not provided, attempting symbolic derivation.
CIR model \(dXt = (6 − 3X_t)dt + 2\sqrt{X_t}dW_t\) via model name or, via exact conditional distribution rcCIR (also implemented in sde)
Show code
d <- expression(6 - 3 * x)
s <- expression(2 * sqrt(x))
X <- sde.sim(X0 = 10, drift = d, sigma = s)sigma.x not provided, attempting symbolic derivation.
Show code
quickplot(X, title = "User-defined CIR")Show code
set.seed(123)
X2 <- sde.sim(X0 = 10, theta = c(6, 3, 2), model = "CIR", rcdist = rcCIR, method = "cdist",
M = 1000)
set.seed(123)
X_many <- sde.sim(X0 = 10, drift = d, sigma = s, M = 1000)sigma.x not provided, attempting symbolic derivation.
Show code
X2_mean <- apply(X2, 1, mean)
X_many_mean <- apply(X_many, 1, mean)
check_acc(X2_mean, X_many_mean, title = "hand-made CIR and sde CIR across 1000 simulations")Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Show code
grid.arrange(ncol = 2, top = "100 simulations of both models", X2[, 1:100] %>%
quickplot(title = "Using model = \"CIR\"", show_legend = F), X_many[, 1:100] %>%
quickplot(title = "Defining expressions", show_legend = F))3 Time series analysis
quantmod is the industry standard of downloading financial data, the main provider is yahoo and uses the xts format which means that the data in actually indexed
Show code
Sq <- getSymbols("AAPL", from = "2015-02-17", to = "2025-02-17", auto.assign = F)
show_df(Sq, rounding = 2) %>%
gt() %>%
sub_missing(columns = everything(), missing_text = "⋮") %>%
tab_header(title = "Apple dataset") %>%
opt_stylize(style = 5, add_row_striping = TRUE)| Apple dataset | ||||||
| DATE | AAPL.Open | AAPL.High | AAPL.Low | AAPL.Close | AAPL.Volume | AAPL.Adjusted |
|---|---|---|---|---|---|---|
| 2015-02-17 | 31.87 | 32.22 | 31.73 | 31.96 | 252609600 | 28.51 |
| 2015-02-18 | 31.91 | 32.19 | 31.86 | 32.18 | 179566800 | 28.71 |
| 2015-02-19 | 32.12 | 32.26 | 32.08 | 32.11 | 149449600 | 28.65 |
| 2015-02-20 | 32.15 | 32.38 | 32.01 | 32.38 | 195793600 | 28.88 |
| 2015-02-23 | 32.51 | 33.25 | 32.42 | 33.25 | 283896400 | 29.66 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 2025-02-10 | 229.57 | 230.59 | 227.20 | 227.65 | 33115600 | 227.35 |
| 2025-02-11 | 228.20 | 235.23 | 228.13 | 232.62 | 53718400 | 232.32 |
| 2025-02-12 | 231.20 | 236.96 | 230.68 | 236.87 | 45243300 | 236.56 |
| 2025-02-13 | 236.91 | 242.34 | 235.57 | 241.53 | 53614100 | 241.21 |
| 2025-02-14 | 241.25 | 245.55 | 240.99 | 244.60 | 40896200 | 244.28 |
Show code
paste("This data was downloadded from", attr(Sq, "src"))[1] "This data was downloadded from yahoo"
Show code
quickplot(Cl(Sq), title = "AAPL close price")from the tseries package it allows you to download data from many providers most importantly FRED(federal reserve databank) kind of like quantmod which is the industry standard though fImport is pretty much the same
Show code
S <- get.hist.quote("AAPL", start = "2010-01-01", end = "2022-01-01")time series starts 2010-01-04
time series ends 2021-12-31
Show code
chartSeries(S, TA = c(addVo(), addBBands()), theme = "white")Show code
S <- S$Close3.1 Log-returns
The time series is better suited to capture statistical properties in relative terms, rather than being affected by the size of the stock
Log returns normalize stock price movements, and allow for direct comparison between different stocks, regardless of their price level
Stock prices aren’t statistically significant, so you need to use log returns, you can use a variety of ways to get them in this case we are using properties of logarithms, whrere the division is made by takint the subsequent difference of the logarithms of stock prices na.omit simply skips the days where there are no data recorded, may that be due to technical issues or whatever else
Show code
X <- diff(log(S))
plot(X)Show code
X <- na.omit(X)3.2 Change-point analysis
Volatility change-point estimator for diffusion processes based on least squares. in this case we can see that the price dynamics have changed during covid
Change point detection is used to check for biased data
Commmon question: Why and how do you use cpoint?
Change point detection (cpoint) in time series analysis is used to identify moments where the statistical properties of a series—such as the mean, variance, or correlation shift significantly.
The function highlights the exact moment when a structural break occurred. Data before the change point should not be used, as it does not reflect the stock’s current behavior.
This is especially important in fields like finance or economics, where events like financial crises, policy changes, or market shocks can introduce structural breaks that violate model assumptions like stationarity. Detecting these points allows analysts to segment the data for more accurate modeling, improve forecast reliability, and better understand the underlying dynamics or regime shifts in the data.
Show code
cp <- cpoint(S)
bprint(cp)k0 = 2532
tau0 = 18285
theta1 = 0.4877375
theta2 = 2.451911
Show code
addVLine = function(dtlist) plot(addTA(xts(rep(TRUE, NROW(dtlist)), dtlist), on = 1,
col = "red"))
addVLine(cpoint(S)$tau0)Show code
## Log-returns with change-point line
S <- as.numeric(S)
n <- length(S)
X <- log(S[-1]/S[-n])
plot(X, type = "l", main = "Apple stock log-returns")
abline(v = cpoint(X)$tau0, col = "red")4 Parameter estimation
Maximum likelihood estimation (MLE): uses the entire density function
Quasi-MLE (QMLE): uses a simplified version of the MLE
Method of moments: uses a few moments
4.1 MLE maximum likelihood estimation
Objective: Estimate unknown parameter(s) \(θ\) by maximizing the likelihood of observing the given data.
- Likelihood Function Discrete case:
\[ ℓ(θ)=L_n(θ)=\prod_{i=1}^n p(x_i;θ)\quad (PMF) \]
\(L_n (θ)\) is not a probability but a measure of how likely \(θ\) is given the data. \(L_n (θ)\) (likelihood of θ) from P(data) probability of data
Log-Likelihood transforms products into sums for computational ease: Derivatives of sums are simpler than derivatives of products \[ LogL_n(\theta) = \sum_{i=1}^n log\ p (x_i;\theta) \]
Log-returns \(r_t = log(P_t/P_{t−1)}\) are often modeled as \(N(μ,σ^2)\)
Show code
cbind(dnorm(-1000:1000/100, mean = 4, sd = 3), dnorm(-1000:1000/100, mean = 2, sd = 3)) %>%
quickplot() + geom_vline(aes(xintercept = 500)) + labs(title = "MLE", subtitle = "vertical line is the unkonwn mean of my data, values increase as mu decreases")Show code
cbind(dnorm(-400:400/100, mean = 1, sd = 1)) %>%
quickplot(xlab = NULL) + geom_hline(aes(yintercept = 0.1)) + geom_hline(aes(yintercept = 0.2)) +
geom_hline(aes(yintercept = 0.3)) + geom_hline(aes(yintercept = 0.5), color = "red") +
geom_hline(aes(yintercept = max(dnorm(-400:400/100, mean = 1, sd = 1))), color = "green") +
labs(title = "Iterative process of trying to find the highest value", subtitle = "the likelihood keeps going until it overshoots and then comes back down")lets say that the function is messier, you need limits optim methods and starting values in order to not fall into a local maximum and not the global maxima
Show code
c(SMA(rnorm(801, sd = 20)/100, n = 50) + dnorm(-400:400/100, mean = 0, sd = 1)) %>%
quickplot(title = "Messy distribution with many obvious local maxima", x_start = -4,
x_size = 101, show_legend = F)Now lets start with a function that we know the distribution assumption of \(N (5,2)\)
using stats4 package take the density function called whatever i want The mle function actually minimizes the negative log-likelihood \(−ℓ(θ)\) as a function of the parameter \(θ\) where \(ℓ(θ) = log L(θ)\).
solve this procedure numerically (/iteratively) instead of starting values wherever you can already give it some starting values of the process, give also an upper value and a lower value to narrow the search down
Commmon question: How to ensure that the value will converge?
function(mu = NUM, sigma = NUM)these are the starting values, if you have an idea of where the parameters of the density islower = c(0, 0) , upper = c(Inf, Inf)set limits for the estimation, its not much use if the correct values are outside the boundaries but its way too much calculations if the boundaries are too largemethod = c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN", "Brent")choose the correct method of optimization
Commmon question: What does the method do?
The non linear optimization methods, picking different combinations of the parameter (in this case mu and sigma) to solve the likelyhood function methods are algorithms which tell R which combinations to use to calculate the log likelyhood, there isnt one best method, so depending on the shape of the distributional assumptions mle is based on optim which has 6 optimization functions in it
Show code
# using the mle function
n <- 1000
xnorm <- rnorm(n, mean = 6, sd = 2)
log.lik <- function(mu = 2, sigma = 2) {
# Choose good starting values
-sum(dnorm(xnorm, mean = mu, sd = sigma, log = TRUE))
}
fit <- mle(log.lik, lower = c(0, 0), upper = c(Inf, Inf), method = "L-BFGS-B")
summary(fit)Maximum likelihood estimation
Call:
mle(minuslogl = log.lik, method = "L-BFGS-B", lower = c(0, 0),
upper = c(Inf, Inf))
Coefficients:
Estimate Std. Error
mu 5.980428 0.06360089
sigma 2.011237 0.04497286
-2 log L: 4235.368
Show code
logLik(fit)'log Lik.' -2117.684 (df=2)
Show code
confint(fit)Profiling...
2.5 % 97.5 %
mu 5.855657 6.105207
sigma 1.926206 2.102703
Show code
check_acc(data1 = sort(xnorm), data2 = sort(rnorm(n, mean = coef(fit)[1], coef(fit)[2])),
title = "Sorted values to confront distributional assumptions")Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Show code
data.frame(LR_sort = xnorm, norm_MLE = rnorm(n, mean = coef(fit)[1], coef(fit)[2])) %>%
ggplot() + geom_histogram(aes(x = LR_sort, fill = "Norm"), alpha = 0.8, bins = 80) +
geom_histogram(aes(x = norm_MLE, fill = "norm_MLE"), alpha = 0.8, bins = 80) +
labs(title = "Distribution charts of both the real and the simulated data", x = NULL) +
scale_fill_manual(name = NULL, values = c(Norm = "indianred3", norm_MLE = "olivedrab2")) +
theme(legend.position = "bottom")MLE fails bc the stock doesnt follow the GbM may be used with correlated regressors and data
Show code
# Delta <- 0.01 #FROM BEFORE # time step of the simulation
x <- na.omit(diff(log(S)))
LR_S <- x
est.log.lik <- function(mu_v = 1, sigma_v = 1) {
-sum(dnorm(x, mean = (mu_v - 0.5 * sigma_v^2), sd = sigma_v, log = TRUE))
}
fit <- mle(est.log.lik, method = "L-BFGS-B", lower = c(0, 1e-06), upper = c(1, 2))
summary(fit)Maximum likelihood estimation
Call:
mle(minuslogl = est.log.lik, method = "L-BFGS-B", lower = c(0,
1e-06), upper = c(1, 2))
Coefficients:
Estimate Std. Error
mu_v 0.00119893 0.0003228334
sigma_v 0.01773977 0.0002258959
-2 log L: -15798.55
Show code
logLik(fit)'log Lik.' 7899.273 (df=2)
Show code
confint(fit)Profiling...
2.5 % 97.5 %
mu_v 0.0005662226 0.001831613
sigma_v 0.0172535788 0.018150722
Show code
check_acc(data1 = sort(as.numeric(x)), data2 = sort(rnorm(length(x), mean = coef(fit)[1],
coef(fit)[2])), title = "Sorted values to confront distributional assumptions")Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Show code
data.frame(LR_sort = as.numeric(x), MLE = rnorm(length(x), mean = coef(fit)[1], sd = coef(fit)[2])) %>%
ggplot() + geom_histogram(aes(x = LR_sort, fill = "Log returns"), alpha = 0.8,
bins = 80) + geom_histogram(aes(x = MLE, fill = "MLE"), alpha = 0.8, bins = 80) +
labs(title = "Distribution charts of our real data and the one that we", x = NULL) +
scale_fill_manual(name = NULL, values = c(`Log returns` = "indianred3", MLE = "lightblue")) +
theme(legend.position = "bottom")Show code
log_returns <- x
mu_prime <- mean(log_returns)
sigma_sq <- mean((log_returns - mu_prime)^2) # MLE variance (divided by n)
sigma <- sqrt(sigma_sq)
mu <- mu_prime + 0.5 * sigma_sq # MLE drift parameter
cat("Closed-form MLE Estimates:\n", "mu =", mu, "\n", "sigma =", sigma, "\n")Closed-form MLE Estimates:
mu = 0.001198089
sigma = 0.01769278
Show code
NLL <- function(mu, sigma) {
mean <- mu - sigma^2/2 # Mean of log returns under GBM
-sum(dnorm(log_returns, mean = mean, sd = sigma, log = TRUE))
}
# Initial parameters from closed-form estimates
start_params <- list(mu = 1e-04, sigma = 1e-05)
# Perform MLE with lower bound for sigma to ensure positivity
gbm_fit <- mle(minuslogl = NLL, start = start_params, method = "L-BFGS-B", lower = c(-Inf,
1e-05))
# Display results
summary(gbm_fit)Maximum likelihood estimation
Call:
mle(minuslogl = NLL, start = start_params, method = "L-BFGS-B",
lower = c(-Inf, 1e-05))
Coefficients:
Estimate Std. Error
mu 0.001216972 0.0003226681
sigma 0.017730352 0.0002255947
-2 log L: -15798.56
Show code
mu_est <- coef(gbm_fit)["mu"]
sigma_est <- coef(gbm_fit)["sigma"]
mean_fit <- mu_est - sigma_est^2/2 # Fitted mean of log returns
hist(log_returns, breaks = 50, probability = TRUE, main = "Log Returns vs Fitted Normal Density",
xlab = "Log Returns", col = "lightblue")
curve(dnorm(x, mean = mean_fit, sd = sigma_est), col = "red", lwd = 2, add = TRUE)
legend("topright", legend = "Fitted Density", col = "red", lwd = 2)Show code
qqnorm(log_returns, main = "Q-Q Plot of Log Returns")
qqline(log_returns, distribution = function(p) qnorm(p, mean = mean_fit, sd = sigma_est),
col = "red", lwd = 2)Show code
# Kolmogorov-Smirnov Test
ks.test(log_returns, "pnorm", mean = mean_fit, sd = sigma_est)
Asymptotic one-sample Kolmogorov-Smirnov test
data: log_returns
D = 0.075107, p-value = 3.189e-15
alternative hypothesis: two-sided
MLE Assumes we know the exact distribution of the data.
Commmon question: Why are fat tails important?
Fat tails capture extreme events (crashes,bubbles) that the normal distribution misses. Ignoring fat tails leads to underestimating risk
4.2 Quasi-MLE quasi-maximum likelihood estimation
From the package yuima
Allows for the possibility that the model might not be fully correct. It maximizes a simplified approximation log-likelihood function instead. Therefore it’s a lesser alternative of the MLE
Also can be used when the actual likelihood is complex or misspecified.
Even though QMLE may not be as efficient as MLE (because it might lose some information), it still gives consistent and asymptotically normal estimates, meaning that as the sample size grows, the estimates become reliable.
Similar to MLE but allows for possible model misspecification.
Uses a simpler version of the likelihood function (called a quasi-likelihood function).
Still gives consistent and asymptotically normal estimates, but they may be slightly less efficient than MLE.
set up from the yuima package \(dX_t = −θ_2X_tdt + θ_1dW_t\) with \(θ_1 = 0.3\) and \(θ_2 = 0.1\).
Show code
theta1 <- 0.3
theta2 <- 0.1
ymodel <- setModel(drift = c("(-1)*theta2*x"), diffusion = matrix(c("theta1"), 1,
1))
ymodel
Diffusion process
Number of equations: 1
Number of Wiener noises: 1
Parametric model with 2 parameters
Show code
n <- 100
ysamp <- setSampling(Terminal = (n)^(1/3), n = n)
yuima <- setYuima(model = ymodel, sampling = ysamp)
set.seed(123)
yuima <- simulate(yuima, xinit = 1, true.parameter = list(theta1 = theta1, theta2 = theta2))
yuima
Diffusion process
Number of equations: 1
Number of Wiener noises: 1
Parametric model with 2 parameters
Number of original time series: 1
length = 101, time range [0 ; 4.64158883361278]
Number of zoo time series: 1
length time.min time.max delta
Series 1 101 0 4.642 0.04641589
Show code
mle1 <- qmle(yuima, start = list(theta1 = 0.8, theta2 = 0.7), lower = list(theta1 = 0.05,
theta2 = 0.05), upper = list(theta1 = 0.5, theta2 = 0.5), method = "L-BFGS-B")
coef(mle1) theta1 theta2
0.27310660 0.05007788
Show code
summary(mle1)Quasi-Maximum likelihood estimation
Call:
qmle(yuima = yuima, start = list(theta1 = 0.8, theta2 = 0.7),
method = "L-BFGS-B", lower = list(theta1 = 0.05, theta2 = 0.05),
upper = list(theta1 = 0.5, theta2 = 0.5))
Coefficients:
Estimate Std. Error
theta1 0.27310660 0.01927869
theta2 0.05007788 0.13443093
-2 log L: -282.6796
QMLE fails because the true model is not gBm, it looks somewhat alike but the fit is worse due to the simplification of the log likelihood function
Show code
ymodel <- setModel(drift = "mu*x", diffusion = "sigma*x")
yuima <- setYuima(model = ymodel, data = setData(S), sampling = setSampling(delta = Delta,
n = length(S)))
qmle <- qmle(yuima, start = list(mu = 0.05, sigma = 0.125), lower = list(mu = 0.025,
sigma = 0.05), upper = list(mu = 0.7, sigma = 0.5), method = "L-BFGS-B")
coef(qmle) sigma mu
0.05007788 0.02507220
Show code
summary(qmle)Quasi-Maximum likelihood estimation
Call:
qmle(yuima = yuima, start = list(mu = 0.05, sigma = 0.125), method = "L-BFGS-B",
lower = list(mu = 0.025, sigma = 0.05), upper = list(mu = 0.7,
sigma = 0.5))
Coefficients:
Estimate Std. Error
sigma 0.05007788 0.0031731550
mu 0.02507220 0.0009112604
-2 log L: 9311.73
Show code
logLik(qmle)'log Lik.' -4655.865 (df=2)
Show code
check_acc(data1 = sort(as.numeric(LR_S)), data2 = sort(rnorm(length(LR_S), mean = coef(qmle)[1],
sd = coef(qmle)[2])), title = "Sorted values to confront distributional assumptions")Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Show code
data.frame(LR_sort = as.numeric(x), QMLE = rnorm(length(x), mean = coef(qmle)[1],
coef(qmle)[2])) %>%
ggplot() + geom_histogram(aes(x = LR_sort, fill = "Log returns"), alpha = 0.8,
bins = 80) + geom_histogram(aes(x = QMLE, fill = "QMLE"), alpha = 0.8, bins = 80) +
labs(title = "Distribution charts of our real data and the one that we", x = NULL) +
scale_fill_manual(name = NULL, values = c(`Log returns` = "indianred3", QMLE = "palegreen")) +
theme(legend.position = "bottom")Using cpoint I was able to find the change during the covid pandemic and im using data from the change-point onwards Visual test of fitting analysis Density plot and QQ-plot the tails are different meaning that the returns dont follow a normal distribution function also AIC and we will see it later and a numerical value is a better option if you are not able to do parameter estiamtion using MLE you cant get the log-likelyhood and the AIC
\[ AIC = −2l_n(\hatθ^{(ML)}_n)+ 2dim(\Theta) \]
its a tug of war btw the performance of the model and the number of parameter preventing overfitting by having just enough parameters and avoiding the unnecessary ones you can have a good fit by having a lot of parameters and
Show code
S <- Cl(getSymbols("AAPL", from = "2020-01-24", to = "2022-01-01", auto.assign = F))
X <- na.omit(diff(log(S)))
gghistogram(X, add.normal = T)
plot(density(X), lwd = 2, main = "Apple stock Density Plot")
f <- function(u) dnorm(u, mean = mean(X), sd = sd(X))
curve(f, -0.1, 0.1, add = TRUE, col = "red", lwd = 2)
gg_qq_plot(as.numeric(X), title = "APPLE Stock")
qqnorm(X, main = "Apple stock QQ plot")
qqline(X, col = "red", lwd = 2)4.3 Method of moments
- \(µ\) is the real mean \(E(X)\)
- \(σ\) is the real variance \(Var(X)\)
- \(X\) is the sample mean
- \(S^2\) is the sample variance
mle needs closed form, so if there is no closed form solution so for distributions that are too convoluted you need method of moments
The advantage that can always be calculated, however its not accurate,
When the parameters match with the 2 moments you can just calculate them basically as is, however in more complex distributions where this is not the case you need more complex processes Like with LEVY processes, with jump component, it has also a stochastic drift + jump component
flowchart LR A(MLE) --> B(QMLE) --> C(MoM)
Computing the sample moments
\[ \begin{cases} E(X) = \frac\alpha\beta\\ Var(X) = \frac\alpha{\beta^2} \end{cases} \]
\[ α = \frac{[E(X)]^2}{Var (X)} \ \ \ \text{and} \ \ \ β =\frac{E(X)}{Var(X)} \]
Show code
# Mean, Variance:
x <- mean(as.numeric(LR_S))
y <- var(as.numeric(LR_S))
data.frame(EX = x, VarX = y)| EX | VarX |
|---|---|
| 0.0010416 | 0.0003131 |
Show code
# finding the Gamma distribution parameters via Method of Moments
alpha <- x^2/y
beta <- x/y
data.frame(alpha, beta)| alpha | beta |
|---|---|
| 0.0034645 | 3.326238 |
Show code
# Estimation of the historical mean and volatility of the GBM via Method of
# Moments
Delta_v <- 1/252
alpha.hat <- mean(LR_S, na.rm = TRUE)
sigma.hat <- sqrt(var(LR_S, na.rm = TRUE))
mu.hat <- alpha.hat + 0.5 * sigma.hat^2
data.frame(sigma.hat = as.numeric(sigma.hat), mu.hat = as.numeric(mu.hat))| sigma.hat | mu.hat |
|---|---|
| 0.0176957 | 0.0011981 |
Show code
check_acc(data1 = sort(as.numeric(LR_S)), data2 = sort(rnorm(length(LR_S), mean = mu.hat,
sd = sigma.hat)), title = "Sorted values to confront distributional assumptions")Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Show code
data.frame(LR_sort = sort(as.numeric(LR_S)), MLE = sort(rnorm(length(LR_S), mean = mu.hat,
sigma.hat))) %>%
ggplot() + geom_histogram(aes(x = LR_sort, fill = "Log returns"), alpha = 0.8,
bins = 80) + geom_histogram(aes(x = MLE, fill = "MOM"), alpha = 0.8, bins = 80) +
labs(title = "Distribution charts of our real data and MoM", x = NULL) + scale_fill_manual(name = NULL,
values = c(`Log returns` = "indianred3", MOM = "#fea966")) + theme(legend.position = "bottom")Commmon question: Advantage and dissadvantages of MoM?
With respect The advantage with method of moments is that if you don’t have the closed form density function you still have the moments of the density if not the whole distribution too convoluted or no closed form You are not using the whole distribution and so the estimate of the parameter will not be as accurate as the MLE which uses the whole distribution
No density needed—works with only moment conditions.
Mle is better because it takes all the distribution MoM is only to be used when density function is not known in closed form or doesn’t exist and therefore not as robust QMLE is simplifies the MLE process and therefore it’s a lesser alternative of the MLE
Commmon question: What is AIC and when do we use it?
The aim is to try to identify the underlying continuous model on the basis of discrete observations using AIC (Akaike Information Criterion) statistics defined by Akaike (1973, 1974) \[ AIC = −2 * ln\Big(\hatθ^{ML}_n\Big)+ 2\text {dim}(Θ) \]
\(ln\Big(\hatθ^{ML}_n\Big)\): Log likelihood from fitting process
\(2\text {dim}(Θ)\): 2 * number of parameters used
When comparing several models for a given dataset, the model such that the AIC is lower is preferred.
Commmon question: Why does AIC only work in MLE is used?
Because AIC depends directly on the log-likelihood, and this is only maximized in the context of MLE. If you estimate parameters with Method of Moments (MoM), there’s no valid likelihood to compute.
5 European options
5.1 Scheme to option pricing
- Download the time series of the underlying asset, polish it up from NAs and from biased data (Change point detection)
- Choose a distributional assumption/stochastic process which you think would fit well on your data
- Do parameter estimation for that chosen distributional assumption
- Do some tests of fitting: if the chosen distributional assumption fits well move to step 5, if it doesn’t go back to Step 2 and try some other distributional assumption, estimate the parameters and do tests of fitting till you find a distributional assumption that fits well on your data;
- Run your simulations and risk-neutralize before or after running your simulations depending on the risk-neutralization method
- Apply your boundary conditions and price the option.
5.2 Risk neutralization
Starting from the concept of risk neutrality with all instruments and all values being priced in the same way in order to not have an advantage based on the positions taken using \(Q\) like in \(E_Q\) to say that you are in the risk neutral risk space
starting from the risk neutral call price you can get back the risk neutral stock price path calculation
like with the martingale property of markovian processes \(E(X_{n+1}|X_1, ..., X_n) = X_n \rightarrow S_0 = e^{-rT}E_Q[S_T]\) with the discounted process being a martingale process
This theorem where the discounted price process needs to be amartingale is called the Girsanov theorem (named after Igor Vladimirovich Girsanov)
From B&S we seen that it starts from the SDE (slide 20 stoch processes) to a closed solution in the process you get rid of \(\mu\) to \(r\) as part of the risk neutralization procedures
\[ S_0 = e^{-rT}E_Q[S_T] = S_0 = e^{-rt} *S_0 EXP\{ \mu d_T +\sigma dW_T \} \]
This sde is not a martingale bc there is a drift component if \(\mu \neq 0\)
To risk-neutralise a GbM we can just keep \(σ\), but substitute the parameter \(µ\) with the risk-free rate \(r\), therefore the risk-neutral parameters are \((r, σ)\)
substitute \(\mu\) with the riskfree rate \(r\)
Meaning that black and scholes follows the Girsanov property
Commmon question: Why risk neutralize?
To ensure arbitrage-free pricing of financial derivatives, we must work in a risk-neutral framework—where prices are derived without dependence on individual risk preferences.
No-Arbitrage Condition: In efficient markets, arbitrage opportunities cannot persist. Prices must adjust to eliminate them.
5.2.1 Risk neutralization methods
2 main methods Escher transformation “a priori” risk neutralization and mean-correcting-martingale is “a posteriori”, easier to use. You can simulate and then riskneutralize at the end
There isnt a best one, you need to try them and check them witht the market prices
Some of the easiest are the mean correcting martingale and the Esscher transformation
Esscher trasnformation
A priori risk neutralization method of getting the phisical parameteres and insert them in a specific formula
\[ f_θ(x) =\frac{exp(θx)f(x)}{\int_R exp(θx)f(x)dx} \]
f(x) is our assumption then the expectations of x multyplied by \(\theta\) trying a bunch of potential theta to make sure that the girsanov theorem is fufilled
Mean correcting martingale
a posteriori risk neutralization method estimate the parameters and simulate the matrix of simulations using the phisical values, then put the matrix into the funky formula
\[ S^{RN}_{M,M}= S0 \ exp(SM,N) \frac{exp(rt)}{E[exp(S_{,N+1})]} \]
matrix of simulation with sde.sim at the numerator times(/divided) S_0 and \(e^{rt}\) divided by the same matrix’s expectations for every time point (/line in the matrix) by using the mean, creating a vector of N+1 elements
Show code
# Parameters
S0 <- 100 # Initial stock price
mu <- 0.05 # Drift
sigma <- 0.2 # Volatility
Time <- 1 # Time horizon (years)
n <- 252 # Number of time steps
M <- 100 # Number of simulated paths
r <- 0.03 # Risk-free rate
# Simulate paths using sde.sim
S_paths <- sde.sim(X0 = S0, model = "BS", theta =c(mu, sigma), T = Time, N = n, M = M)
# Mean of exponentiated paths at each time step
E_S_N1 <- apply(S_paths, 1, mean)
# Apply Mean-Correcting Martingale (MCM) formula
S_RN_MCM <- S0 * S_paths * exp(r * (1:N) * (Time/N)) / E_S_N1
grid.arrange(top = paste("Simulated Stock Price Paths before and after MCM\n Showing", min(30, M), "simulations"), ncol =2,
quickplot(S_paths[,1:min(30, M)], show_legend = F, title = "BEFORE"),
quickplot(S_RN_MCM[,1:min(30, M)], show_legend = F, title = " AFTER", ylab = NULL))Show code
# Plot the original GBM paths vs. the Mean-Correcting Martingale
ggplot() +
geom_line(data = melt(S_paths[,1:min(30, M)]), aes(x = Var1, y = value, color = "GBM Paths", group = Var2),
linewidth = 1) +
geom_line(data = melt(S_RN_MCM[,1:min(30, M)]), aes(x = Var1, y = value, color = "MCM Paths", group = Var2),
linewidth = 1) +
labs(title = "Stock Price Paths vs. Mean-Correcting Martingale Adjusted Paths",
subtitle = paste("Showing", min(30, M), "simulations"),
x = "Time (Days)", y = "Stock Price") +
scale_color_manual(values = c("#131cce", "red")) +
theme(legend.title = element_blank(), legend.position = "bottom")Show code
grid.arrange(top = "Boxplot to compare how the distribution changes across time steps", ncol=2,
melt(data.frame(t(as.data.frame(S_paths)))) %>%
ggplot(aes(x = variable, y = value)) +
geom_boxplot(fill = "#131cce", color = "navyblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Before", x = NULL, y = NULL)+
theme(
axis.text.x = element_blank(),
panel.grid.minor.x = element_blank(), # Remove minor x-axis grid lines
panel.grid.major.x = element_blank()) # Keep major x-axis grid lines
,
melt(data.frame(t(as.data.frame(S_RN_MCM)))) %>%
ggplot(aes(x = variable, y = value)) +
geom_boxplot(fill = "red", color = "darkred") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "After", x = NULL, y = NULL) +
theme(
axis.text.x = element_blank(),
panel.grid.minor.x = element_blank(), # Remove minor x-axis grid lines
panel.grid.major.x = element_blank() # Keep major x-axis grid lines
)
)No id variables; using all as measure variables
No id variables; using all as measure variables
Show code
# Set seed for reproducibility
set.seed(42)
# Parameters
S0 <- 100 # Initial stock price
r <- 0.05 # Risk-free rate
T <- 1 # Total time (1 year)
N_steps <- 20 # Number of time steps (daily)
N_paths <- 10 # Number of simulated paths
# Simulate log-prices (Brownian motion)
log_S_paths <- matrix(rnorm(N_paths * N_steps, mean = 0, sd = 0.01), nrow = N_paths,
ncol = N_steps)
log_S_paths <- t(apply(log_S_paths, 1, cumsum)) # Cumulative sum (log-price)
# Convert to actual prices
S_paths <- S0 * exp(log_S_paths)
# Time grid
dt <- T/N_steps
t_grid <- seq(dt, T, by = dt)
# Martingale correction function
mean_correcting_martingale <- function(S_paths, S0, r, t_grid) {
N_paths <- nrow(S_paths)
N_steps <- ncol(S_paths)
S_RN <- matrix(0, nrow = N_paths, ncol = N_steps)
for (t in 1:N_steps) {
# Expected value of S_{t+1} (if t < N_steps)
if (t < N_steps) {
E_S <- mean(S_paths[, t + 1])
} else {
# If at last step, approximate E[S_{t+1}] as S_t * exp(r * dt)
E_S <- mean(S_paths[, t]) * exp(r * dt)
}
# Apply martingale correction
S_RN[, t] <- S0 * S_paths[, t] * exp(r * t_grid[t])/E_S
}
return(S_RN)
}
# Apply correction
S_RN <- mean_correcting_martingale(S_paths, S0, r, t_grid)
# Verify martingale property: E[S_RN[, t]] should ≈ S0 * exp(r * t) for (t in
# 1:N_steps) { cat( 't =', round(t_grid[t], 3), '| E[S_RN] =',
# round(mean(S_RN[, t]), 2), '| S0 * exp(r*t) =', round(S0 * exp(r *
# t_grid[t]), 2), '\n' ) }5.3 B&S pricing
No need for boundary conditions in the case of a plain vanilla option
Show code
# CALL formula
call.price <- function(x = 1, Time = 0, T_mat = 1, r = 1, sigma = 1, K = 1) {
d2 <- (log(x/K) + (r - 0.5 * sigma^2) * (T_mat - Time))/(sigma * sqrt(T_mat -
Time))
d1 <- d2 + sigma * sqrt(T_mat - Time)
price <- x * pnorm(d1) - K * exp(-r * (T_mat - Time)) * pnorm(d2)
return(price)
}
# PUT formula
put.price <- function(x = 1, Time = 0, T_mat = 1, r = 1, sigma = 1, K = 1) {
d2 <- (log(x/K) + (r - 0.5 * sigma^2) * (T_mat - Time))/(sigma * sqrt(T_mat -
Time))
d1 <- d2 + sigma * sqrt(T_mat - Time)
price <- K * exp(-r * (T_mat - Time)) * pnorm(-d2) - x * pnorm(-d1)
return(price)
}
# Example
S0 <- 100
K <- 110
r <- 0.05
Time <- 1/4
sigma <- 0.25
C <- call.price(x = S0, Time = 0, T_mat = Time, r = r, K = K, sigma = sigma)
cat("Call price according to Black and Scholes", C, "\n")Call price according to Black and Scholes 1.980506
Show code
P <- put.price(x = S0, Time = 0, T_mat = Time, r = r, K = K, sigma = sigma)
cat("Put price according to Black and Scholes", P, "\n")Put price according to Black and Scholes 10.61406
Show code
# Put-Call parity
P2 <- C - S0 + K * exp(-r * Time)
cat("Put price according to Put call parity", P2, "\n")Put price according to Put call parity 10.61406
5.4 Montecarlo Pricing
Your MCPrice function is implementing Monte Carlo pricing for European-style options using antithetic variates to reduce variance. Specifically, it prices a European call or put option based on the payoff function f(x), where x represents the final stock price at maturity.
Stock Price Simulation: It simulates terminal stock prices under the Black-Scholes model \[ S_T=S_0 \cdot EXP\left\{(r− \frac12 σ^2)(T−t)+σ\sqrt{T−t}Z\right\} \]
It also generates antithetic variates (i.e., both \(u\) and \(-u\)), which improves efficiency by reducing variance
Payoff Computation The fun f(xx) represents the option payoff, meaning you can define different options: For a European call option: f <- fun(x) max(0, x - K) For a European put option: f <- fun(x) max(0, K - x)
Discounting the Expected Payoff: The expected payoff is discounted using the risk-free rate \(e^{−r(T−t)}\)
here we are also doing antithetical sampling as a form of variance reduction. taking half of the simulations m from from rnorm(m / 2) and the other half from - rnorm(m / 2)
Show code
MCPrice <- function(x = 1, Time = 0, T_mat = 1, r = 1, sigma = 1, M = 1000, f) {
h <- function(m) {
u <- rnorm(m/2)
tmp <- c(x * exp((r - 0.5 * sigma^2) * (T_mat - Time) + sigma * sqrt(T_mat -
Time) * u), x * exp((r - 0.5 * sigma^2) * (T_mat - Time) + sigma * sqrt(T_mat -
Time) * (-u)))
mean(sapply(tmp, function(xx) f(xx)))
}
p <- h(M)
p * exp(-r * (T_mat - Time))
}
# Example
f <- function(x) max(0, x - K)
M <- 1000
MC1 <- MCPrice(x = S0, Time = 0, T_mat = Time, r = r, sigma, M = M, f = f)
cat("Call price according to Monte Carlo", MC1, "after", M, "simulations, with a diffference of",
MC1 - C, "\n")Call price according to Monte Carlo 2.143071 after 1000 simulations, with a diffference of 0.1625648
Show code
M <- 50000
MC2 <- MCPrice(x = S0, Time = 0, T_mat = Time, r = r, sigma, M = M, f = f)
cat("Call price according to Monte Carlo", MC2, "after", M, "simulations, with a diffference of",
MC2 - C, "\n")Call price according to Monte Carlo 2.002983 after 50000 simulations, with a diffference of 0.02247705
Show code
M <- 1e+06
MC3 <- MCPrice(x = S0, Time = 0, T_mat = Time, r = r, sigma, M = M, f = f)
cat("Call price according to Monte Carlo", MC3, "after", M, "simulations, with a diffference of",
MC3 - C, "\n")Call price according to Monte Carlo 1.982009 after 1e+06 simulations, with a diffference of 0.001502341
Show code
# Speed of convergence
m <- c(10, 50, 100, 150, 200, 250, 500, 1000)
p1 <- NULL
err <- NULL
nM <- length(m)
repl <- 100
mat <- matrix(, repl, nM)
for (k in 1:nM) {
tmp <- numeric(repl)
for (i in 1:repl) {
tmp[i] <- MCPrice(x = S0, Time = 0, T_mat = Time, r = r, sigma, M = m[k],
f = f)
}
mat[, k] <- tmp
p1 <- c(p1, mean(tmp))
err <- c(err, sd(tmp))
}
colnames(mat) <- m
mat %>%
round(5) %>%
datatable()With the law of large numbers you can see that the values converge to the black and scholes value
Show code
p0 <- C
minP <- min(p1 - err)
maxP <- max(p1 + err)
plot(m, p1, type = "n", ylim = c(minP, maxP), axes = F, ylab = "MC price", xlab = "MC replications")
lines(m, p1 + err, col = "blue")
lines(m, p1 - err, col = "blue")
axis(2, p0, "B&S price")
axis(1, m)
boxplot(mat, add = TRUE, at = m, boxwex = 15, col = "orange", axes = F)
points(m, p1, col = "blue", lwd = 3, lty = 3)
abline(h = p0, lty = 2, col = "red", lwd = 3)Show code
# Assuming mat is a matrix: rows = replications, columns = sample sizes Convert
# matrix to long format
mat_long <- as.data.frame(mat)
mat_long$rep <- 1:nrow(mat_long)
mat_long <- pivot_longer(mat_long, cols = -rep, names_to = "m", values_to = "MC_price")
mat_long$m <- as.numeric(gsub("V", "", mat_long$m)) # Convert m to numeric if needed
# Create a summary frame for lines and CI bands
summary_df <- data.frame(m = m, p1 = p1, upper = p1 + err, lower = p1 - err)
summary_df %>%
round(4) %>%
datatable()Show code
# Plot
ggplot(mat_long, aes(x = factor(m), y = MC_price)) + geom_boxplot(fill = "orange",
outlier.shape = 1, width = 0.5) + geom_point(data = summary_df, aes(x = factor(m),
y = p1), color = "blue", size = 2) + geom_line(data = summary_df, aes(x = factor(m),
y = upper, group = 1), color = "blue") + geom_line(data = summary_df, aes(x = factor(m),
y = lower, group = 1), color = "blue") + geom_hline(yintercept = C, linetype = "dashed",
color = "red", size = 1) + labs(x = "MC replications", y = "MC price")5.5 FFT pricing
Moment generating function with a complex number \(i=\sqrt{-1}\) From the density function the FFT adjusts from the real domain to the immaginary domain and then using the inverse transform of this function and get the oppposite value to the real domain \(φ(t) = E\{e^{itX} \}\)
Its a form of pricing to be used as a last resort, when possible you need to be using montecarlo simulations
VERY Commmon question: What is Fast Fourier Transformation? when to use it? how to improve convergence?
The Fast Fourier Transform (FFT) method is especially useful in option pricing. It relies on the Characteristic function(a transformation of the random variable into the imaginary domain), which is the imaginary domain, so there is a problem where the number is not a real number like with pricing, but you can take the inverse of this function and get it back to the real domain giving you the end price
It can be used if the density function doesn’t exist or is too convoluted and cannot be determined in closed-form Density function is real domain, or is too mathematically complex to work with directly—as is often the case with multi-asset models or models with jumps
A key advantage is that this function always exists, even when the density function does not. Since characteristic functions live in the complex (imaginary) domain, we use an inverse Fourier transform to retrieve results in the real domain, such as option prices. It also have some properties that allows for precise pricing in many advanced models (multi-asset models)
To enhance the numerical stability and convergence speed of this inversion, a Dampening is an “hyper-factor” (usually denoted \(\mu\)) is introduced. This dampening acts as a smoothing parameter (for ensuring that the integral in the inverse transform is well-behaved). A typical value for \(\mu\) in the geometric Brownian motion setting is 1, but it can be adjusted to improve convergence when using FFT.
Show code
FFTcall.price <- function(phi, S0, K, r, Time, alpha = 1, N = 2^12, eta = 0.25) {
m <- r - log(phi(-(0 + (0+1i))))
phi.tilde <- function(u) (phi(u) * exp((0 + (0+1i)) * u * m))^Time
psi <- function(v) {
exp(-r * Time) * phi.tilde((v - (alpha + 1) * (0 + (0+1i))))/(alpha^2 + alpha -
v^2 + (0 + (0+1i)) * (2 * alpha + 1) * v)
}
lambda <- (2 * pi)/(N * eta)
b <- 1/2 * N * lambda
ku <- -b + lambda * (0:(N - 1))
v <- eta * (0:(N - 1))
tmp <- exp((0 + (0+1i)) * b * v) * psi(v) * eta * (3 + (-1)^(1:N) - ((1:N) -
1 == 0))/3
ft <- fft(tmp)
res <- exp(-alpha * ku) * ft/pi
inter <- spline(ku, Re(res), xout = log(K/S0))
return(inter$y * S0)
}
phiBS <- function(u) {
exp((0 + (0+1i)) * u * (mu - 0.5 * sigma^2) - 0.5 * sigma^2 * u^2)
}
mu <- 1
FFT <- FFTcall.price(phiBS, S0 = S0, K = K, r = r, Time = Time)
cat("Call price according to Monte Carlo", FFT, "with a diffference of", FFT - C,
"\n")Call price according to Monte Carlo 1.984243 with a diffference of 0.003736918
6 Levy processes
Starting from the limitations of the GbM Paul Levy devised a new kind of stochastic process where spikes are possible
- Distributions are not normal
- correlations converge on one
- markets are not efficient
- investors and traders are deeply flawed
- there is no such thing as a rational investor
- there are many anomalies
Levy processes were introduced as the sum of a jump process and a Brownian motion with drift. The original idea was to construct a family of processes wide enough to comprise a variety of well-known other stochastic processes.
with the summation of 2 different processes \(X_t\) and \(M_t\)
\[ Z_t = X_t + M_t \]
The initial process based on a GbM
\[ X_t = µt + σB_t \ \ \ \ \ \ µ ∈ \mathbb R, \ \ σ ≥ 0 \]
And the second
\[ M_t =\sum ^{N_t}_{i=0}Y_{τi} − λ_t \mathbb E(Y_{τi}) \ \ \ λ ≥ 0 \]
with \(N_t\) an homogenous Poisson process which counts the random number of jumps and \(Y_{τi}\) a sequence of i.i.d. random variables which represent the entity of the jumps
This process, within two random jumps, has a continuous paths.
A stochastic process \({Zt , 0 ≤ t ≤ T}\), with \(Z0 = 0\) almost surely is called a Levy process if the following properties are satisfied
\(Z_t\) has independent increments, i.e. \(Zt − Zs\) is independent of \(I_s\) , for any \(0 ≤ s < t ≤ T\)
\(Z_t\) has stationary increments, i.e. for any \(0 ≤ s, t ≤ T\) the distribution of \(Zt+s − Zt\) does not depend on \(t\)
\(Z_t\) is stochastically continuous, i.e. for every \(0 ≤ t ≤ T\) and \(ϵ> 0\) we have that \(\lim_{s→t} P(|Zt − Zs | > ϵ) = 0\)
Levy-Khintchine Formula Lévy process is characterized by its characteristic function, which is given by the Lévy–Khintchine formula
\[ \mathbb E [e^{iuX}] = exp\left[ibu − \frac{u^2c}2 + \int _\mathbb R \left(e^{iux} − 1 − iux \boldsymbol 1_{|x|<1}\right) ν(dx)\right] \]
Levy triplets In the above, \(\mathbf {1}\) is the indicator function. Because characteristic functions uniquely determine their underlying probability distributions, each Lévy process is uniquely determined by the “Lévy–Khintchine triplet” \((b,\ c,\ ν)\) or on wikipedia \((a,\sigma ^{2},\Pi)\). The terms of this triplet suggest that a Lévy process can be seen as having three independent components: a linear drift, a Brownian motion, and a Lévy jump process
Any process that has this levy triplets (AKA triplets of levy char) can be a levy process The law PX of a random variable X is infinitely divisible if and only if there exists a triplet \((b,\ c,\ ν)\), such that:
drift term \(b \in \mathbb R\)
diffusion coefficient \(c \in \mathbb R \geq 0\)
Levy measure ν satisfying the property \(\int _\mathbb R (1 ∧ |x|^2) ν(dx) < \infty\)
GbM can be a levy process with levy measure = 0 any sde can be a levy
Commmon question: Are levy processes martingale and markovian?
Submartingale more than supermartingale gamma distribution tails. And a markovian process, still only the last point filtration
Show code
startLV <- Qdate(9, 10, 2017)
ticker <- "TSLA"
S <- Cl(getSymbols(ticker, from = startLV, to = "2024-02-16", auto.assign = F))
cp <- cpoint(as.numeric(S))
bprint(cp)k0 = 728
tau0 = 728
theta1 = 1.758111
theta2 = 9.068365
Show code
ggplot(S, aes(x = index(S), y = S)) + geom_line() + geom_vline(aes(xintercept = startLV +
cp$tau0), color = "red") + labs(title = paste("Price of", ticker, "with changepoint"),
x = "Date", y = "Price")Don't know how to automatically pick scale for object of type <xts/zoo>.
Defaulting to continuous.
Show code
S <- Cl(getSymbols(ticker, from = startLV + cp$tau0, to = "2024-02-16", auto.assign = F))
l_ret <- na.omit(diff(log(S)))Following the scheme for option pricing, i need to take the prices from the changepoint onwards, instead of 2017-10-09, i’m taking the data from 2019-10-07
VARIANCE-GAMMA MODEL: using the VarianceGamma package to get the prameters Commmon question: What are the different methods?
“BFGS” uses the quasi-Newton method “BFGS” as documented in optim;
“Nelder-Mead” uses an implementation of the Nelder and Mead method as documented in optim, most of the time this is that reaches convergence faster
“nlm” Newton Raphson method uses the nlm function in R.
choosing based on the Log likelyhood, however there is evidence that the Nelder-Mead is the best to achieving convergence
Show code
# Parameters fitting
vgfit <- vgFit(l_ret, ) # esitmate VG parameters on the sample
summary(vgfit)
Data:
Parameter estimates:
vgC sigma theta nu
-0.02125 0.04347 0.02553 0.11841
Likelihood: 1937.632
Method: Nelder-Mead
Convergence code: 1
Iterations: 1001
Show code
bprint(vgfit$param)vgC = -0.02124906
sigma = 0.04347108
theta = 0.0255324
nu = 0.1184145
Show code
## Assign parameters
vg_param <- as.numeric(vgfit$param)
c <- vg_param[1]
sigma <- vg_param[2]
theta <- vg_param[3]
nu <- vg_param[4]
Time <- 1/4 # option maturity = 3 months
N <- 100 # number of steps for each path
r <- 0.01 # arbitrary risk-free rate
nsim <- 100 # number of simulated path
# Variance Gamma function Variance Gamma function
VG <- function(sigma, nu, theta, Time, N, r) {
a <- 1/nu
b <- 1/nu
h <- Time/N
Time <- (0:N) * Time/N
X <- rep(0, N + 1)
I <- rep(0, N)
X[1] <- 0
for (i in 1:N) {
I[i] <- rgamma(1, a * h, b)
X[i + 1] <- X[i] + theta * I[i] + sigma * sqrt(I[i]) * rnorm(1)
}
return(X)
}
VG.vec <- function(sigma, nu, theta, Time, N, r) {
h <- Time/N
Timef <- seq(0, Time, length.out = N + 1)
I <- rgamma(N, shape = h/nu, scale = nu) # Vectorized gamma samples
W <- rnorm(N, mean = 0, sd = 1) # Pre-generate normal samples
X <- c(0, cumsum(theta * I + sigma * sqrt(I) * W)) # Efficient cumsum
return(X)
}
## Create a matrix to fill with random paths using the function VG just created
VG_paths <- matrix(nrow = nsim, ncol = N + 1)
for (i in 1:nsim) {
VG_paths[i, ] <- VG(sigma, nu, theta, Time = Time, N, r)
}
t(VG_paths) %>%
format(scientific = TRUE, digits = 2) %>%
datatable()Show code
# plot the Monte Carlo Simulation
colori <- viridis(nsim)
plot(VG_paths[1, ], col = 0, type = "l", ylim = c(min(VG_paths), max(VG_paths)),
main = "Monte Carlo Simulation for VG returns", sub = paste(N, "steps", nsim,
"paths"), xlab = "Time", ylab = "VG returns")
for (i in 2:nsim) {
lines(VG_paths[i, ], col = colori[i], lwd = 2)
}Show code
lims <- c(min(VG_paths), max(VG_paths))
grid.arrange(ncol = 2, top = "Monte Carlo Simulation for Variance Gamma returns",
VG_paths %>%
t() %>%
quickplot(subtitle = paste("All", nsim, "simulations"), show_legend = F) +
ylim(lims), quickplot(apply(VG_paths, 2, mean), subtitle = "Mean across timesteps",
show_legend = F) + ylim(lims))6.1 Testing basic levy
following the scheme to option pricing we do test of fitting according to qqplot and the density poligon,
Show code
## TESTS (both graphical and not) OF DISTRIBUTIONAL ASSUMPTIONS QQplot
l_ret.s <- sort(as.numeric(l_ret)) # sort the log returns
p <- ppoints(length(l_ret.s)) # plotting position
VG.q <- qvg(p, vgC = c, sigma = sigma, theta = theta, nu = nu) # compute the quantile
plot(x = VG.q, y = l_ret.s, main = "Variance-Gamma Q-Q Plot", xlab = "Theoretical Quantiles",
ylab = "Sample Quantiles")
abline(0, 1, col = "red") # Add reference lineShow code
ggplot(data.frame(VG.q), aes(sample = as.numeric(VG.q))) + stat_qq(color = "blue") +
stat_qq_line(color = "black", size = 1) + labs(title = "QQ Plot of VG", x = "Theoretical Quantiles",
y = "Sample Quantiles") + theme(plot.subtitle = element_text(hjust = 0.5))Show code
# good result, linear
plot_density_comparison <- function(data, title = "Density Comparison", kde_color = "coral2",
vg_color = "seagreen3") {
# Compute Kernel Density Estimate (KDE)
kde_data <- density(data)
kde_df <- data.frame(x = kde_data$x, y = kde_data$y)
# Compute Variance Gamma (VG) density
x_vals <- seq(min(data), max(data), length.out = 500)
vg_y_vals <- dvg(x_vals, mean(data), sd(data))
vg_df <- data.frame(x = x_vals, y = vg_y_vals)
# Plot using ggplot2
ggplot() + geom_line(data = kde_df, aes(x = x, y = y, color = "Kernel Density"),
linewidth = 1) + geom_line(data = vg_df, aes(x = x, y = y, color = "Variance Gamma"),
linewidth = 1) + scale_color_manual(values = c(`Kernel Density` = kde_color,
`Variance Gamma` = vg_color)) + labs(x = "", y = "", title = title, color = "Density Type") +
theme_minimal()
}
# Example usage
plot_density_comparison(l_ret, title = "My Custom Density Plot")H0 = The data is consistent with a specified reference distribution. H1 = The data is NOT consistent with a specified reference distribution
AIC doesnt use \(c / vgC\) as paramter
Commmon question: Why do we use the Kolmogorov-Smirnov test?
assumption of normality
Show code
# Chi^2 test
test <- chisq.test(l_ret.s, VG.q)
test
Pearson's Chi-squared test
data: l_ret.s and VG.q
X-squared = 1202312, df = 1201216, p-value = 0.2397
Show code
paste("With a Chi^2 test: high p-value (0.24)")[1] "With a Chi^2 test: high p-value (0.24)"
Show code
ifelse(test$p.value < 0.24, paste("We can't reject the null hypotesis as the p-value is",
round(test$p.value, 4)), paste("We reject the null hypotesis as the p-value is",
round(test, 4)))[1] "We can't reject the null hypotesis as the p-value is 0.2397"
Show code
# K-S test
test <- ks.test(as.numeric(l_ret), rvg(length(as.numeric(l_ret)), param = c(c, sigma,
theta, nu)))
test
Asymptotic two-sample Kolmogorov-Smirnov test
data: as.numeric(l_ret) and rvg(length(as.numeric(l_ret)), param = c(c, sigma, theta, nu))
D = 0.10848, p-value = 4.952e-06
alternative hypothesis: two-sided
Show code
paste("With a Kolmogorov-Smirnov test: high p-value (0.10)")[1] "With a Kolmogorov-Smirnov test: high p-value (0.10)"
Show code
ifelse(test$p.value < 0.1, paste("We can't reject the null hypotesis as the p-value is",
round(test$p.value, 4)), paste("We reject the null hypotesis as the p-value is",
round(test$p.value, 4)))[1] "We can't reject the null hypotesis as the p-value is 0"
Show code
# Summary statistics through basicStats from fbasics
final_retVG <- VG_paths[, N + 1]
# desc_df(data.frame(final_retVG))
data.frame(t(basicStats(final_retVG)))| nobs | NAs | Minimum | Maximum | X1..Quartile | X3..Quartile | Mean | Median | Sum | SE.Mean | LCL.Mean | UCL.Mean | Variance | Stdev | Skewness | Kurtosis | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| final_retVG | 100 | 0 | -0.053882 | 0.059753 | -0.003604 | 0.022196 | 0.008361 | 0.006858 | 0.836083 | 0.001969 | 0.004454 | 0.012268 | 0.000388 | 0.019689 | -0.010403 | 0.052392 |
Show code
gghistogram(final_retVG, bins = 30, title = "Histogram of last time steps", subtitle = paste0("not much disclosing when nsim is small (now = ",
nsim, ")"))6.2 From variance-gamma returns to stock prices
Before actually thinking of doing option pricing with Levy processes, there are two transformations that need to be performed on our chosen Levy process:
The exponential transformation (because our Levy process starts from zero and could also go below zero): this is necessary for the Levy processes to be able to represent the underlying asset;
Pass from the physical process to the risk neutral one: this is necessary in order to price the option without arbitrage.
So in the end it becomes:
\[ S(t) = S(0)e^{rt+Z_t} \]
\(Z_t\) levy process * some people add the compounding factor of \(rt\) we use it starting from \(S_0\) which is the last price of the timeseries available
Show code
r <- 0.01
S0 <- as.numeric(tail(S, n=1)) #prezzo inziale
S0[1] 200.45
Show code
# function for stock price with VG returns
VGexp <- function(sigma, nu, theta, Time, N, r, S0) {
a <- 1 / nu
b <- 1 / nu
h <- Time / N
Time <- (0:N) * Time / N
X <- rep(0, N + 1)
I <- rep(0, N)
X[1] <- S0
for (i in 1:N) {
I[i] <- rgamma(1, a * h, b) # gamma component for the jump
X[i + 1] <- X[i] * exp(r * Time + theta * I[i] + sigma * sqrt(I[i]) * rnorm(1))
}
return(X)
}
VGexp_paths <- matrix(nrow = nsim, ncol = N + 1)
for (i in 1:nsim) {
VGexp_paths[i, ] <- VGexp(sigma, nu, theta, Time, N, r, S0)
}
VGexp_paths %>% t() %>% round(4) %>% datatable()Show code
plot(VGexp_paths[1,], col=0, type="l", ylim = c(min(VGexp_paths),max(VGexp_paths)),
main = "MC Simlation for VG stock prices", sub = "100 steps, 10 paths",
xlab = "Time", ylab = "S&P 500")
for(i in 2:nsim){
lines(VGexp_paths[i,], col=colori[i], lwd = 2);
}Show code
lims <- c(min(VGexp_paths), max(VGexp_paths))
grid.arrange(
ncol = 2, top = "Monte Carlo Simulation for Variance Gamma stock Levy process",
quickplot(t(VGexp_paths), title = "", subtitle = paste("All",nsim, "simulations"), show_legend = F)+
ylim(lims),
quickplot(apply(VGexp_paths, 2, mean), title = "Mean across timesteps", show_legend = F)+
ylim(lims)
)Show code
## Statistics on final prices
final_pricesVG<-VGexp_paths[,N+1]
# Risk neutral transform MCM
rn_final_pricesVG<-S0*final_pricesVG*(exp(r*Time)/mean(final_pricesVG))
# (S0*VGexp_paths*(exp(r*Time)/apply(VGexp_paths,2, mean))) %>%
# quickplot("Mean correcting martingale of the whole VG stock paths", show_legend = F) ## dont think its correct
# taking all the last 5 gridpoints to see how they change in the last time steps
basicStats(data.frame(
VGexp_paths[, N - 4],
VGexp_paths[, N - 3],
VGexp_paths[, N - 2],
VGexp_paths[, N - 1],
VGexp_paths[, N],
VGexp_paths[, N + 1])) %>%
set_colnames(c("T-5", "T-4", "T-3", "T-2", "T-1", "T")) %>%
round(4) %>%
mutate(Statistics = rownames(basicStats(1)))%>% # just to extract the rownames
relocate(Statistics, .before = everything()) %>% # move this new column
gt() %>%
data_color(alpha = 0.2, columns = 2:7, rows = everything(), autocolor_text = F, direction = "row",
palette = c("red", "white", "green")) %>%
opt_stylize(5, "gray", F) %>%
tab_options(table.font.size = px(11)) %>%
tab_header("Variance gamma model last stock stock prices statistics")| Variance gamma model last stock stock prices statistics | ||||||
| Statistics | T-5 | T-4 | T-3 | T-2 | T-1 | T |
|---|---|---|---|---|---|---|
| nobs | 100.0000 | 100.0000 | 100.0000 | 100.0000 | 100.0000 | 100.0000 |
| NAs | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 |
| Minimum | 188.4303 | 188.4303 | 186.7764 | 186.7892 | 186.7892 | 186.7892 |
| Maximum | 214.4349 | 214.4484 | 214.5432 | 214.5432 | 214.5432 | 217.3709 |
| 1. Quartile | 198.3554 | 198.4929 | 198.5642 | 198.4928 | 198.4923 | 198.4923 |
| 3. Quartile | 203.8600 | 203.9916 | 203.9924 | 203.9387 | 203.9394 | 203.9420 |
| Mean | 200.8464 | 200.9176 | 200.9311 | 200.8878 | 200.9012 | 200.8996 |
| Median | 200.6743 | 200.7400 | 200.7400 | 200.6824 | 200.7409 | 200.7409 |
| Sum | 20084.6404 | 20091.7565 | 20093.1141 | 20088.7842 | 20090.1162 | 20089.9556 |
| SE Mean | 0.4285 | 0.4269 | 0.4320 | 0.4324 | 0.4355 | 0.4436 |
| LCL Mean | 199.9961 | 200.0704 | 200.0739 | 200.0299 | 200.0371 | 200.0194 |
| UCL Mean | 201.6967 | 201.7647 | 201.7884 | 201.7458 | 201.7652 | 201.7798 |
| Variance | 18.3651 | 18.2270 | 18.6658 | 18.6947 | 18.9633 | 19.6779 |
| Stdev | 4.2854 | 4.2693 | 4.3204 | 4.3237 | 4.3547 | 4.4360 |
| Skewness | 0.2020 | 0.1672 | 0.0456 | 0.1086 | 0.0747 | 0.2366 |
| Kurtosis | 1.0079 | 1.0447 | 1.3316 | 1.2601 | 1.2576 | 1.8331 |
Show code
grid.arrange(top = "Distribution of last prices", ncol=2,
gghistogram(rn_final_pricesVG, bins = 30)+ labs(subtitle = "After MCM"),
gghistogram(final_pricesVG, bins = 30, fill = "firebrick")+ labs(subtitle = "before MCM"))Show code
##OPTION PRICING
K <- S0 #prova: opzione ATM
payoff_VG <- pmax(rn_final_pricesVG - K, 0)
optprice_VG <- mean(payoff_VG)*exp(-r*Time)
optprice_VG[1] 1.920265
6.2.1 Pricing with FFT under the VG process
Show code
# VG process
theta <- -0.1436
nu <- 0.3
r <- 0.1
sigma <- 0.12136
Time <- 1/4
K <- 101
S <- 100
alpha <- 1.65
phiVG <- function(u) {
omega <- (1/nu) * (log(1 - theta * nu - sigma^2 * nu/2))
tmp <- 1 - (0 + (0 + (0+1i))) * theta * nu * u + 0.5 * sigma^2 * u^2 * nu
tmp <- tmp^(-1/nu)
exp((0 + (0 + (0+1i))) * u * log(S0) + u * (r + omega) * (0 + (0 + (0+1i)))) *
tmp
}
FFTcall.price <- function(phi, S0, K, r, Time, alpha = 1, N = 2^12, eta = 0.25) {
m <- r - log(phi(-(0 + (0 + (0+1i)))))
phi.tilde <- function(u) (phi(u) * exp((0 + (0 + (0+1i))) * u * m))^Time
psi <- function(v) {
exp(-r * Time) * phi.tilde((v - (alpha + 1) * (0 + (0 + (0+1i)))))/(alpha^2 +
alpha - v^2 + (0 + (0 + (0+1i))) * (2 * alpha + 1) * v)
}
lambda <- (2 * pi)/(N * eta)
b <- 1/2 * N * lambda
ku <- -b + lambda * (0:(N - 1))
v <- eta * (0:(N - 1))
tmp <- exp((0 + (0 + (0+1i))) * b * v) * psi(v) * eta * (3 + (-1)^(1:N) - ((1:N) -
1 == 0))/3
ft <- fft(tmp)
res <- exp(-alpha * ku) * ft/pi
inter <- spline(ku, Re(res), xout = log(K/S0))
return(inter$y * S0)
}
FFTcall.price(phiVG, S0 = S0, K = K, r = r, Time = Time)[1] 101.4806
Show code
black_scholes(S = S0, K = S0, Time = Time, r = r, sigma = sigma, type = "call")[1] 7.667262
6.3 Meixner model
jump components driven now by the meixner distribution:
- a (scale)
- b (skewness)
- d (tail heaviness)
- m (location)
Commmon question: How to estimate Meixner model?
4 components but levy never a perfect martingale density function is too convoluted so i can’t perform MLE since the resutls will not be stable, so i have to settle for MoM
Lévy Process: Independent/stationary increments, starts at zero.
Markovian: Future values depend only on the current state.
Submartingale Behavior: Typically drifts upward due to dominant positive jumps (though drift-jump balance can vary).
Properties
It has independent and stationary increments, and starts at zero.
Markovian: Like all Lévy processes, the Meixner process is Markovian—future values depend only on the current state, not on the past trajectory.
Submartingale Behavior: In practice, the jump component often dominates the drift. Since the negative drift is usually smaller in magnitude than the positive jumps, the process tends to behave as a submartingale (i.e., it drifts upward onaverage), although this can vary depending on the data
since we are using MoM you get less efficient estimates than MLE since MoM ignores full distributional information, Cannot use AIC/BIC for model comparison (no likelihood function).
\[ f(x; a, b, d, m) = \frac{(2 \cos(b/2))^{2d}}{2a\pi\Gamma(2d)} \exp\left( \frac{b(x - m)}{a} \right) \Gamma\left( d + \frac{i(x - m)}{a} \right)^{-1} \]
where \(a>0\), \(-\pi<b<\pi\), \(d>0\), and \(m \in \mathbb{R}\).
Show code
# Moments: mean, variance, skewness, kurtosis
x <- mean(l_ret, na.rm = TRUE)
y <- sd(l_ret, na.rm = TRUE)
z <- as.numeric(skewness(l_ret, na.rm = TRUE))
w <- as.numeric(kurtosis(l_ret, na.rm = TRUE))
# Mom: estimates parameters m, a, b, d as functions of the moments
m <- x - ((z * sqrt(y))/(w - (z^2) - 3))
a <- sqrt(y * (2 * w - 3 * (z^2) - 6))
b <- 2 * atan(-sqrt((z^2)/(2 * w - 3 * (z^2) - 6)))
d <- 1/(w - (z^2) - 3)
# risk neutral transformation Esscher transform: Meixner(a, a*theta + b, d, m)
# distribution
# theta <- -1/a * (b + 2 * atan((-cos(a/2)+ exp((m-r)/2*d))/sin(a/2))) b <-
# a*theta+b
# mean correction
# m <- r -2 *d*log(cos(b/2)/cos((a+b)/2))
data.frame(mean = x, sd = y, skew = z, kurt = w)| mean | sd | skew | kurt |
|---|---|---|---|
| 0.0023131 | 0.0420622 | -0.2178853 | 3.404312 |
Show code
data.frame(a = a, b = b, d = d, m = m)| a | b | d | m |
|---|---|---|---|
| 0.1673974 | -0.5217283 | 2.802395 | 0.1275416 |
Show code
# Meixner function
MX <- function(a, b, d, M, N) {
distr <- udmeixner(a, b, d, m) # meixner distribution
gen <- pinvd.new(distr) # Polynomial interpolation of INVerse CDF
rdmMXgen <- ur(gen, N) # randomly draws N objects from gen (from a Meixner distr)
h <- Time/N
X <- rep(0, N + 1)
for (i in 1:N) {
X[i + 1] <- X[1] + rdmMXgen[i]
}
return(X)
}
replicate(10, MX(a, b, d, m, N)) %>%
quickplot()Show code
MX_paths <- matrix(nrow = nsim, ncol = N + 1) # fill the matrix with random paths that follow
for (i in 1:nsim) {
MX_paths[i, ] <- MX(a, b, d, m, N)
}
MX_paths %>%
t() %>%
format(x = , scientific = TRUE, digits = 3) %>%
datatable()Show code
# plot the Monte Carlo Simulation plot(MX_paths[1, ], col = 0, type = 'l', ylim
# = c(min(MX_paths), max(MX_paths)), main = 'Monte Carlo Simulation for Meixner
# returns', sub = '100 steps, 10 paths', xlab = 'Time', ylab = 'MXNR returns' )
# for (i in 2:nsim) { lines(MX_paths[i, ], col = colori[i], lwd = 2) }
t(MX_paths)[1:10, ] %>%
quickplot(show_legend = F)Show code
lims <- c(min(MX_paths), max(MX_paths))
grid.arrange(ncol = 2, top = "Monte Carlo Simulation for Meixner model Returns Levy process",
quickplot(t(MX_paths), title = "", subtitle = paste("All", nsim, "simulations"),
show_legend = F) + ylim(lims), quickplot(apply(MX_paths, 2, mean), title = "Mean across timesteps",
show_legend = F) + ylim(lims))Show code
# QQplot
MX.q <- uq(pinvd.new(udmeixner(a, b, d, m)), p) # compute the quantile
plot(MX.q, l_ret.s, main = "Meixner Q-Q Plot", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")Show code
# good result, linear
# summary statistics
final_retMX <- MX_paths[, N + 1]
basicStats(final_retMX)| final_retMX | |
|---|---|
| nobs | 100.000000 |
| NAs | 0.000000 |
| Minimum | -0.433825 |
| Maximum | 0.502951 |
| 1. Quartile | -0.126952 |
| 3. Quartile | 0.138073 |
| Mean | 0.017252 |
| Median | -0.010423 |
| Sum | 1.725232 |
| SE Mean | 0.020809 |
| LCL Mean | -0.024038 |
| UCL Mean | 0.058543 |
| Variance | 0.043304 |
| Stdev | 0.208095 |
| Skewness | 0.354182 |
| Kurtosis | -0.342453 |
Show code
gghistogram(final_retMX, bins = 30, title = "Histogram of last time steps", subtitle = paste0("not much disclosing when nsim is small (now = ",
nsim, ")"))Heavy-tailed and skewed, making it ideal for modeling financial returns (captures volatility clustering and extreme events).
6.3.1 Meixner model stock prices
apply the same \(S_0 e^{rtMX}\) to get the stock prices
Show code
#FROM MEIXNER RETURNS TO STOCK PRICES
#function for stock price with Meixner returns
MXexp=function(a, b, d, m, N, Time, r, S0) {
distr <- udmeixner(a, b, d, m) # meiner distribution
gen <- pinvd.new(distr) # Polynomial interpolation of INVerse CDF
generazioni <- ur(gen,N) # randomly draws N objects from gen (from a Meixner distr)
h=Time/N
Time=(0:N)*Time/N
X=rep(0,N+1)
X[1]=S0
for (i in 1:N){
X[i+1]=X[1]*exp(r*Time+generazioni[i])
}
return(X)
}
replicate(10,MXexp(a, b, d, m, N, Time, r, S0)) %>% quickplot()Show code
MXexp_paths<-matrix(nrow = nsim, ncol=N+1)
for(i in 1:nsim){
MXexp_paths[i,]<-MXexp(a,b,d,m,100,Time,r,S0) #vengono tutte le linee uguali perché MX non varia!
}
MXexp_paths %>% t() %>% round(2) %>% datatable()Show code
lims <- c(min(MXexp_paths), max(MXexp_paths))
grid.arrange(
ncol = 2, top = "Monte Carlo Simulation for Meixner exponential stock Levy process",
quickplot(t(MXexp_paths), title = "", subtitle = paste("All",nsim, "simulations"), show_legend = F)+
ylim(lims),
quickplot(apply(MXexp_paths, 2, mean), title = "Mean across timesteps", show_legend = F)+
ylim(lims)
)Show code
#statistics on final prices
final_pricesMX<-MXexp_paths[,N+1]
basicStats(data.frame(
MXexp_paths[, N - 4],
MXexp_paths[, N - 3],
MXexp_paths[, N - 2],
MXexp_paths[, N - 1],
MXexp_paths[, N],
MXexp_paths[, N + 1])) %>%
set_colnames(c("T-5", "T-4", "T-3", "T-2", "T-1", "T")) %>%
round(4) %>%
mutate(Statistics = rownames(basicStats(1)))%>% # just to extract the rownames
relocate(Statistics, .before = everything()) %>% # move this new column
gt() %>%
data_color(alpha = 0.2, columns = 2:7, rows = everything(), autocolor_text = F, direction = "row",
palette = c("red", "white", "green")) %>%
opt_stylize(5, "gray", F) %>%
tab_options(table.font.size = px(11)) %>%
tab_header("Meixner model last stock stock prices statistics")| Meixner model last stock stock prices statistics | ||||||
| Statistics | T-5 | T-4 | T-3 | T-2 | T-1 | T |
|---|---|---|---|---|---|---|
| nobs | 100.0000 | 100.0000 | 100.0000 | 100.0000 | 100.0000 | 100.0000 |
| NAs | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 |
| Minimum | 127.7837 | 106.3109 | 118.7661 | 90.4394 | 127.3317 | 118.3932 |
| Maximum | 300.7634 | 287.0294 | 346.1954 | 324.0616 | 360.6678 | 310.4487 |
| 1. Quartile | 177.2899 | 176.5640 | 178.0976 | 170.5136 | 168.8760 | 176.4591 |
| 3. Quartile | 229.5995 | 234.1000 | 229.1251 | 222.9662 | 222.1397 | 217.2482 |
| Mean | 202.7674 | 200.9537 | 202.3592 | 200.6255 | 199.0466 | 198.2508 |
| Median | 198.8183 | 194.5433 | 198.1209 | 198.8265 | 195.9186 | 194.8980 |
| Sum | 20276.7436 | 20095.3739 | 20235.9162 | 20062.5549 | 19904.6621 | 19825.0814 |
| SE Mean | 3.5394 | 4.1925 | 4.3016 | 4.7034 | 4.1660 | 3.5169 |
| LCL Mean | 195.7445 | 192.6349 | 193.8238 | 191.2930 | 190.7803 | 191.2726 |
| UCL Mean | 209.7904 | 209.2725 | 210.8946 | 209.9581 | 207.3129 | 205.2291 |
| Variance | 1252.7416 | 1757.6913 | 1850.4152 | 2212.1841 | 1735.5744 | 1236.8448 |
| Stdev | 35.3941 | 41.9248 | 43.0165 | 47.0339 | 41.6602 | 35.1688 |
| Skewness | 0.2825 | -0.0222 | 0.3490 | 0.2572 | 0.7788 | 0.5558 |
| Kurtosis | -0.2996 | -0.6303 | 0.2456 | 0.1154 | 1.0655 | 0.5359 |
Show code
#risk neutral transform
rn_final_pricesMX<-S0*(final_pricesMX)*(exp(r*Time)/(mean(final_pricesMX)))
rn_final_pricesMX %>% t() %>% datatable()Show code
basicStats(rn_final_pricesMX)| rn_final_pricesMX | |
|---|---|
| nobs | 100.000000 |
| NAs | 0.000000 |
| Minimum | 122.736965 |
| Maximum | 321.838760 |
| 1. Quartile | 182.933166 |
| 3. Quartile | 225.218757 |
| Mean | 205.524413 |
| Median | 202.048600 |
| Sum | 20552.441278 |
| SE Mean | 3.645911 |
| LCL Mean | 198.290135 |
| UCL Mean | 212.758691 |
| Variance | 1329.266552 |
| Stdev | 36.459108 |
| Skewness | 0.555787 |
| Kurtosis | 0.535943 |
Show code
hist(rn_final_pricesMX)Show code
########payoff if you use the Esscher transform or the first mean correcting martingale
#payoff_MX <- pmax(final_pricesMX - K, 0)
#optprice_MX <- mean(payoff_MX)*exp(-r*Time)
#optprice_MX
#########payoff if you mean correct of the simulated paths
payoff_MX <- pmax(rn_final_pricesMX - K, 0)
optprice_MX <- mean(payoff_MX)*exp(-r*Time)
optprice_MX[1] 101.9437
7 American options
higher price since its evalued at all times and even OTM can become ITM with time and volatility using Longstaff and Schwartz method
This method is extremely easy to understand and implement. Remember that the objective is to estimate the value
\[ C = \max_τ \mathbb E\left[e−rτ max(S − k, 0)\right] \]
Over all possible sets of times τ ≤ T. In practice this is only a finite set of times on a grid as all simulation schemes are by their nature discrete.
Core Challenges of American Options Pricing
Early Exercise: Unlike European options, American options can be exercised at any time before expiration, requiring dynamic optimization.
Curse of Dimensionality: Traditional methods (e.g., binomial trees) become computationally infeasible for high-dimensional problems due to exponential growth of paths (e.g., 100^3 simulations for 3 time steps).
Longstaff-Schwartz Method (LSM) Key Idea: Combines Monte Carlo simulation with backward induction and regression to estimate the optimal exercise strategy, get the payoff and pricing based on that .
Steps:
Simulate Paths: Generate \(N\) price paths under the risk-neutral measure.
Backward Induction: Start at expiration (t=T) and compute the immediate payoff.
Move backward to t=T−1:
Compare the immediate exercise value vs. the continuation value (estimated via regression on future discounted payoffs).
Decision: Exercise if immediate payoff > continuation value.
Repeat until t=0.
- Regression:
Fit a model (e.g., polynomial) to discounted future payoffs using in-the-money (ITM) paths.
Basis functions: \(X,X^2,X^3\) (flexible choice).
- Discount & Average:
Discount cash flows from optimal exercise times to t=0 and average across all paths.
Show code
## least squares method
LSM <- function(n, d, S0, K, sigma, r, Time) {
s0 <- S0/K
dt <- Time/d
z <- rnorm(n)
s.Time <- s0 * exp((r - 1/2 * sigma^2) * Time + sigma * z * (Time^0.5))
s.Time[(n + 1):(2 * n)] <- s0 * exp((r - 1/2 * sigma^2) * Time - sigma * z *
(Time^0.5))
CC <- pmax(1 - s.Time, 0)
payoffeu <- exp(-r * Time) * (CC[1:n] + CC[(n + 1):(2 * n)])/2 * K
euprice <- mean(payoffeu)
for (k in (d - 1):1) {
z <- rnorm(n)
mean <- (log(s0) + k * log(s.Time[1:n]))/(k + 1)
vol <- (k * dt/(k + 1))^0.5 * z
s.Time.1 <- exp(mean + sigma * vol)
mean <- (log(s0) + k * log(s.Time[(n + 1):(2 * n)]))/(k + 1)
s.Time.1[(n + 1):(2 * n)] <- exp(mean - sigma * vol)
CE <- pmax(1 - s.Time.1, 0)
idx <- (1:(2 * n))[CE > 0]
discountedCC <- CC[idx] * exp(-r * dt)
## three basis function x^0 x^1 and x^2
basis1 <- exp(-s.Time.1[idx]/2)
basis2 <- basis1 * (1 - s.Time.1[idx])
basis3 <- basis1 * (1 - 2 * s.Time.1[idx] + (s.Time.1[idx]^2)/2)
p <- lm(discountedCC ~ basis1 + basis2 + basis3)$coefficients
estimatedCC <- p[1] + p[2] * basis1 + p[3] * basis2 + p[4] * basis3 ## cont region
EF <- rep(0, 2 * n)
EF[idx] <- (CE[idx] > estimatedCC) ## compare the payoff with cont
CC <- (EF == 0) * CC * exp(-r * dt) + (EF == 1) * CE
s.Time <- s.Time.1
# print(z) print(mean) print(vol) print(s.Time.1) print(mean)
# print(s.Time.1) print(CE) print(idx) print(discountedCC)
# print(basis1) print(basis2) print(basis3) print(p) print(estimatedCC)
# print(EF) print(EF) print(CC) print(s.Time)
}
payoff <- exp(-r * dt) * (CC[1:n] + CC[(n + 1):(2 * n)])/2
usprice <- mean(payoff * K)
error <- 1.96 * sd(payoff * K)/sqrt(n)
earlyex <- usprice - euprice
data.frame(usprice, error, euprice)
}
S0 <- 135
K <- 135
Time <- 1
r <- 0.05
sigma <- 0.4
LSM(1e+06, 3, S0, K, sigma, r, Time)| usprice | error | euprice |
|---|---|---|
| 18.18593 | 0.0158393 | 17.73897 |
| Method | Pros | Cons |
|---|---|---|
| Binomial Tree | Exact for 1D, handles early exercise. | Exponential complexity in high dimensions. |
| Finite Difference | PDE-based, precise for low dimensions. | Hard to extend beyond 2-3 dimensions. |
| LSM | Handles high dimensions, easy to implement. | Approximation error, depends on regression. |
8 Fraone
8.1 Black-Scholes model: Moving beyond the normality of returns
A key assumption of the Black-Scholes model is that the underlying asset follows a Geometric Brownian Motion process. This implies that returns are normally distributed and, in turn, the underlying price is log-normally distributed.
Show code
grid.arrange(ncol = 2, quickplot(dnorm((-400:400)/100), title = "Theoretical return distribution",
show_legend = F, xlab = NULL, ylab = NULL, x_size = 100, x_start = -4), quickplot(dlnorm(seq(0.01,
5, length.out = 500), meanlog = 0, sdlog = 0.7), title = "Theoretical underlying price distribution",
show_legend = F, xlab = NULL, ylab = NULL, x_size = 100, x_start = 0))From a logical standpoint, pricing an option is all about correctly assessing the probability of the option to end up in the money (St>K) and correctly quantifying the expected value of the future payoff.
The complexity of option pricing is not the evaluation of the expected value of the payoff itself. In fact, the crucial part is the assumption regarding the future distribution of the underlying asset. If we get this wrong, then our pricing will be incorrect.
Show code
x <- dlnorm(seq(0.01, 5, length.out = 500), meanlog = 0, sdlog = 0.7)
quickplot(x, title = "Theoretical underlying price distribution", xlab = NULL, ylab = NULL,
x_size = 100, x_start = 0) + geom_vline(aes(xintercept = 2)) + geom_ribbon(aes(xmin = 0,
xmax = 2, color = NULL, fill = "OTM"), alpha = 0.1) + geom_ribbon(aes(xmin = 2,
xmax = 5, color = NULL, fill = "ITM"), alpha = 0.1) + scale_fill_manual("Moneyness",
values = c(OTM = "lightblue", ITM = "indianred")) + guides(color = "none")Option pricing is all about making an assumption regarding the future probability distribution of the underlying. This assumption mainly depends on two factors: the type of distribution chosen and its parameters.
Under Black-Scholes, the type of distribution of the underlying returns is the normal distribution. Under the assumption of normal distribution, the only parameter required is the standard deviation of returns (σ). This is an input to the Black-Scholes model.
When you estimate σ, regardless of the method you use (e.g. Historical, MLE, QuasiMLE, etc.), you are implicitely assuming that the calibrated σ will be a good representation of the standard deviation of returns in the future (i.e. the life of the option).
At this point it is fair to wonder whether the assumption of normality for the underlying returns is appropriate.
8.1.1 Stock indeces
We start with the main equity indexes, namely the S&P500, Nasdaq and Dow Jones. Specifically, we will be using the most traded ETFs for these indexes: SPY, QQQ and DIA respectively. The results would be virtually identical if we used the future contracts for each index instead of their ETFs.
Show code
fraoneshow("SPY", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2024))Show code
fraoneshow("QQQ", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2024))Show code
fraoneshow("DIA", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2024))The three indexes analysed above offer an aggregate view on the whole equity market, however they are comprised of several stocks from different sectors. While it is interesting to observe the empirical behaviour of the entire market, it can’t be ignored that each sector and each stock within a sector have their own behaviour. Although the empirical distribution of returns will not vary majorly across sectors or individual stocks, it is important to remember that there are differences.
As an example, we have extended the analysis done on the three equity indexes to the ETFs of some major equity sectors, specifically:
\(\sigma\) is important the price goes up the stairs and goes down by the elevator
A pronounced left tail means that the BS fail since your empirical distribution changes heavily in that point which is important for pricing
Financials (XLF - Financial Select Sector SPDR)
Consumer staples (XLP - Consumer Staples Select Sector SPDR).
Show code
fraoneshow("XLF", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2021))Show code
fraoneshow("SLP", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2021))8.1.2 Bonds
Similarly, we’ll now extend the analysis beyond equities and we’ll move into different asset classes. The same analysis is presented here below regarding the bond market. For the sake of this example we are looking at the TLT ETF (iShares 20+ Year Treasury Bond ETF) which is a very liquid and actively traded ETF on the US bond market.
Show code
fraoneshow("TLT", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2021))8.1.3 Single stocks
Earlier we have observed the distributions of equity indices and sector ETFs, which represent a broad basket of stocks. This provides a view on the behaviour of the overall market, however each stock has unique features. Below we will look at the empirical distributions of some individual stocks, from different sectors. For the purpose of this analysis, we will look at the following:
GME and AMC, two of the so called ‘meme stocks’, which became popular in 2021, when they recorded major spikes following short squeezes
CCL and CSCO, from the space of leisure and telecom sectors
SE and Z, two tech stocks which have gained popularity from 2020 onwards
8.1.3.1 Gamestop
massive difference in movment between the 2021 movments with the wallstreetbets subreddit pushing prices up and the same period in 2020
Show code
fraoneshow("GME", from = Qdate(1, 1, 2020), to = Qdate(1, 6, 2020))Show code
fraoneshow("GME", from = Qdate(1, 1, 2021), to = Qdate(1, 6, 2021))8.1.3.2 AMC Entertainment
Show code
fraoneshow("AMC", from = Qdate(1, 1, 2020), to = Qdate(1, 6, 2020))Show code
fraoneshow("AMC", from = Qdate(1, 1, 2021), to = Qdate(1, 6, 2021))8.1.3.3 CCL carrnival cruises
During covid they couldnt do anything and had massive mantainance costs, immediately think
Show code
fraoneshow("CCL", from = Qdate(1, 1, 2018), to = Qdate(1, 6, 2019))Show code
fraoneshow("CCL", from = Qdate(1, 1, 2020), to = Qdate(1, 6, 2021))8.1.3.4 Zeta
single tech stock with a couple of bull runs happening quickly
Show code
fraoneshow("Z", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2021))We have observed the features of the empirical distributions of several assets, from different asset classes. All these show some evidence of non-normality of returns, as we have discussed. There is also another feature to bear in mind. What observed in the charts so far is not static. Any financial asset may display a changing behaviour, based on the period we are looking at. As observed so far, assuming normality of returns may lead to inaccurate pricing. The degree of such inaccuracy may be different based on the period we are looking at. In order to provide an example, we will now look at the distributions of two of the assets analysed earlier, after restricting the period of the time series. The charts will highlight the differences in the empirical distributions compared to the ones of the longer period analysed earlier.
8.1.4 Commodities
Commodities offer different insights regarding the empirical distribution Moving to commodities, the same analysis is presented here below for gold. We are looking at the GLD ETF (SPDR Gold Trust ETF) which is one of the most traded ETF on gold.
Show code
fraoneshow("GLD", from = Qdate(1, 1, 2018), to = Qdate(1, 1, 2021))8.1.5 OIL
Looking at energy commodities, we have analysed a major Oil ETF (USO) and a future contract on TTF natural gas, which is the main gas market in Europe. We will shortly see that the thesis of non normality of returns is confirmed in this case as well, although these two commodities offer different insights regarding the empirical distribution. In particular, it is interesting to observe the behaviour of returns for these commodities, which are very different from all other assets analysed so far. This reflects the nature of these markets which are strongly driven by their fundamental supply/demand dynamics. Oil shows climate changes like cold spells and wars on the right tail supply and demand dynamics seen in the tails
Show code
fraoneshow("USO", from = Qdate(1, 1, 2020), to = Qdate(1, 6, 2020))Show code
fraoneshow("USO", from = Qdate(1, 1, 2021), to = Qdate(1, 6, 2021))We have analysed the empirical distribution of returns for a number of instruments across different asset classes and we have confirmed that it is rare to find normally distributed returns.
What are the implications in terms of option pricing?
Think about what we discussed earlier and the importance of the assumption of the underlying returns distribution. In short, pricing under the Black-Scholes model, which implicitely assumes normality of returns, may lead to inaccurate option pricing! Does it mean that Black-Scholes should not be used? Absolutely not! Despite its limitations, Black-Scholes offers a number of advantages, such as mathematical tractability, ease of use and a closed formula solution for pricing and greeks.
In fact, Black-Scholes is still the most popular model used for option pricing in the industry.
8.1.6 Conclusions
Drop the assumption of normality of returns (i.e. Geometric brownian motion)and use more advanced stochastic processes instead. Later in the course you will study a number of different stochastic processes, including Lévy processes. Such processes typically have more parameters and allow to better represent kurtosis and skewness observed in the empirical distributions of returns.
Continue using the Black-Scholes model, although with the introduction of some adjustments to the σ used as input, as normally done by market participants in the industry. As a result of this process, in practice we will observe the phenomenon of the volatility smile (also known as volatility skew).
8.2 Black-Scholes model: Implied volatility
8.2.1 Implied volatilty
volatilty implied by the market price the market is always right instead of seeing options as \(P=f(S,K,r,t, \sigma)\) you rewrite the function as \(\sigma=f(S,K,r,t,P)\), you cant do it analytically and so you have to do this numerically options traded respect believed prices and and all market operators use the same model and the same GbM you should always see the same value coming out of the procedure
in reality you dont see this and the option price should be the one that sigma will not be a function of anything else
when you do this with market prices and you get the implied volatility you will not have the flat shape of the implied volatility, so returns are not normal and the GBM is not the correct way to visualize prices as that is the only parameter to change as the rest is observable and fixed
Show code
import pandas as pd
import os
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from scipy.optimize import minimize
from math import inf
import numpy as np
from scipy.stats import norm
# Black-Scholes pricing function
def BS_option_price(S, K, T, r, sigma, option_type):
d1 = (np.log(S/K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
d2 = d1 - sigma * np.sqrt(T)
if option_type == 'call':
option_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
elif option_type == 'put':
option_price = K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)
else:
raise ValueError("Invalid option type. Please define 'call' or 'put'.")
return option_price
# Implied volatility function
def implied_volatility(option_mkt_price, S, K, T, r, option_type, initial_guess):
result = minimize(objective_function, initial_guess, args=(option_mkt_price, S, K, T, r, option_type), method='Nelder-Mead')
## scipy is a library that allows for a lot of statistical analysis
## minimize the objective functions
implied_vol = result.x ## only take the first element, python is 0 index
return implied_vol
## inside implied volatility
# Objective function
def objective_function(sigma, option_mkt_price, S, K, T, r, option_type):
return abs((BS_option_price(S, K, T, r, sigma, option_type) - option_mkt_price))Show code
# Inputs
S0 = 154.73
timestep = 1/252
K = 155
T = 33 * timestep
r = 0.05
option_type='call'
initial_guess = 0.2
option_mkt_price = 7.1
# Implied volatility function
def implied_volatility(option_mkt_price, S, K, T, r, option_type, initial_guess):
result = minimize(objective_function, initial_guess, args=(option_mkt_price, S, K, T, r, option_type), method='Nelder-Mead')
## scipy is a library that allows for a lot of statistical analysis
## minimize the objective functions
implied_vol = result.x[0] ## only take the first element, python is 0 index
return implied_vol
# Output
implied_vol = implied_volatility(option_mkt_price, S0, K, T, r, option_type, initial_guess)
print(implied_vol)0.3018261718750004
8.3 Black-Scholes model: Volatility smile
backing out the volatilty as a function of the price on the market not possible to do in a closed form solution
we can still solve it numerically and obtain the value of \(σ\), which is typically called implied volatility, given that it is the volatility implied by real market option prices.
If you repeat this process for all the available strike prices for the same option, you will realise that the value of the implied volatility that you obtain changes every time. To do this we use scipy which is a library that allows for a lot of statistical analysis like minimizing the objective function inside implied volatility you need the objective function as an argument which is the absolute value of the subtraction of the Black and Scholes option price and the actual option market price, and to get the smile we just iterate it over the whole option chain
In theory, if the Black-Scholes assumption of normality of returns was correct, we should get always the same σ, regardless of the strike price or underlying price. This is simply due to the model specification of Black-Scholes. The parameter \(σ\) is meant to be a constant in the Black-Scholes model, not a function of the strike price or underlying price.
However, if we get a different σ depending on the strike or underlying price of the option, this implies that σ is not a constant
Show code
from pathlib import Path
file_path = Path("C:/Users/pietr/OneDrive/Documenti/aCATTOLICA ANNO 24-25/2 SEMESTRE/(ASF) - Applied Statistics for Finance/(ASF) - Python/Options_input.xlsx")
df_option_data = pd.read_excel(file_path)
df_option_data Strike price Option price Option type
0 110 0.32 put
1 120 0.77 put
2 130 1.55 put
3 140 2.90 put
4 150 5.30 put
5 160 4.60 call
6 170 1.57 call
7 180 0.47 call
8 190 0.18 call
9 200 0.08 call
Show code
## Compute the volatility smile: method 1
# Add an empty column in df_option_data (we will store the implied volatility values in this column later)
df_option_data['Implied vol'] = float('nan')
# Loop through df_option_data rows, compute the implied volatility for each option and store the value in the 'Implied vol' column of the dataframe
for opt in range(df_option_data.shape[0]):
df_option_data['Implied vol'][opt] = implied_volatility(option_mkt_price = df_option_data['Option price'][opt],
S=S0, r=r, T=T,
initial_guess= initial_guess,
K=df_option_data['Strike price'][opt],
option_type=df_option_data['Option type'][opt])
# Plot the volatility smile
plt.plot(df_option_data['Strike price'], df_option_data['Implied vol'])
plt.xlabel('Strike price')
plt.ylabel('Implied Volatility')
plt.title('Volatility smile')
plt.show()We can also use a lambda function which is more efficient and get the same results
Show code
df_option_data['Implied vol_2'] = df_option_data.apply(lambda row: implied_volatility(row['Option price'],
S0, row['Strike price'], T, r,
row['Option type'], initial_guess), axis=1)
df_option_data Strike price Option price Option type Implied vol Implied vol_2
0 110 0.32 put 0.523438 0.523438
1 120 0.77 put 0.492578 0.492578
2 130 1.55 put 0.450332 0.450332
3 140 2.90 put 0.403271 0.403271
4 150 5.30 put 0.357197 0.357197
5 160 4.60 call 0.287002 0.287002
6 170 1.57 call 0.268320 0.268320
7 180 0.47 call 0.266641 0.266641
8 190 0.18 call 0.284688 0.284688
9 200 0.08 call 0.305938 0.305938
8.4 The Volatility Index (VIX)
Show code
VIXSPY <- get_portfolio(c("^VIX", "SPY"), fun = Cl, clean_names = T)
VIXSPY %>%
rebase() %>%
quickplot()Show code
ggcorrplot(cor(ROC(VIXSPY, type = "cont"), use = "na.or.complete"), title = "Correlation between returns and volatilty",
lab = TRUE, lab_size = 10, legend.title = "")8.5 Option pricing and Liquidity
An important assumption widely made in option pricing is perfect liquidity. This assumption is generally made for any pricing model, not only for the Black-Scholes model
In practice any asset typically has two prices: the bid price and the ask price
And 2 things happen, either the prices are close enough that it doesnt make a difference or it changes things
you can do both bid-ask prices since the “reliable correct price” is somewhere in the middle
take the midpoint as a proxy
or take the last price traded if its sufficiently recent
and since liquidity of the option market is much lower than the liquidity of the market for the underlying you really feel it
option chains hide the liquidity since most of it is going to be at the ATM points
time also plays a major role, even on the SP500 you go from a BA spread of 5 cents in the 54 days maturity chain to 3-5 dollars BA, all of this at the ATM
lastly you also need to consider the overall size of the market, the smaller market cap stock are going to have way less contracts being traded. Interestingly there is much less spread at deep ITM options than at ATM since people are just putting the fishing line in and waiting. lastly there are no bids for otm options
For stocks like this its best recomended to think in ranges
this chart is for the SPY ETF with volatility smiles visualized with the bid, ask and the midpoint prices, red is puts blue is calls,
this is a great way to visualize why we use OTM puts and calls to visualize volatility smiles, OTM options are more liquid so there is much less difference ITM calls at the very left are way less liquid and so there is more difference in less liquid
since there is less protection and a more linear relation btw underlying and option price
in general you can see that taking the midpoint is good as it catches pretty well the “good” OTM sides of the volatility skew
Show code
# implied volatility
bs_price <- function(S, K, r, T, sigma, type = "call") {
d1 <- (log(S/K) + (r + 0.5 * sigma^2) * T)/(sigma * sqrt(T))
d2 <- d1 - sigma * sqrt(T)
if (type == "call") {
return(S * pnorm(d1) - K * exp(-r * T) * pnorm(d2))
} else if (type == "put") {
return(K * exp(-r * T) * pnorm(-d2) - S * pnorm(-d1))
} else {
stop("Option type must be 'call' or 'put'")
}
}
implied_vol <- function(market_price, S, K, r, T, type = "call") {
objective <- function(sigma) {
theoretical_price <- bs_price(S, K, r, T, sigma, type)
return((theoretical_price - market_price)^2)
}
result <- optimize(objective, interval = c(1e-06, 5), tol = 1e-08)
return(result$minimum)
}
T_left <- as.numeric(Qdate(19, 12, 2025) - Qdate(27, 5, 2025))/365
market_price <- as.numeric(Cl(getSymbols(Symbols = "SPY", auto.assign = FALSE, from = Qdate(29,
4, 2025), to = Qdate(30, 4, 2025))))
dir <- "C:\\Users\\pietr\\OneDrive\\Documenti\\aCATTOLICA ANNO 24-25\\2 SEMESTRE\\(ASF) - Applied Statistics for Finance\\(ASF) - R\\ASF\\SPY chain.xlsx"
## calls
data_calls <- readxl::read_xlsx(dir, sheet = "CALLS") %>%
mutate(type = "call", S = market_price, r = r, T = T_left)
data_calls$implied_vol <- mapply(FUN = implied_vol, market_price = data_calls$`Last Price`,
S = data_calls$S, K = data_calls$Strike, r = r, T = T_left, type = data_calls$type)
## puts
data_puts <- readxl::read_xlsx(dir, sheet = "PUTS") %>%
mutate(type = "put", S = market_price, r = r, T = T_left)
data_puts$implied_vol <- mapply(FUN = implied_vol, market_price = data_puts$`Last Price`,
S = data_puts$S, K = data_puts$Strike, r = r, T = T_left, type = data_puts$type)
ggplot() + geom_point(data = data_calls, size = 2, aes(x = Strike, implied_vol, color = "CALLS"),
shape = ifelse(data_calls$Moneyness == "OTM", 4, 19)) + geom_point(data = data_puts,
aes(x = Strike, implied_vol, color = "PUTS"), shape = ifelse(data_puts$Moneyness ==
"OTM", 4, 19)) + theme_minimal()8.6 Greeks and Approximations in Option Pricing
Understand the Black-Scholes Greeks (Delta, Vega, Theta, Rho, Gamma)
Use them to approximate option price changes under small shocks
Compare these approximations to exact re-pricing using the Black-Scholes (BS) formula
When linear approximations are sufficient, and when they break down?
Show code
# Black-Scholes Greeks function
def BS_greeks(S, K, T, r, sigma, option_type, rounding = inf):
d1 = (np.log(S/K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
d2 = d1 - sigma * np.sqrt(T)
# Delta
if option_type == 'call':
delta = norm.cdf(d1)
elif option_type == 'put':
delta = norm.cdf(d1) - 1
else:
raise ValueError("Invalid option type. Please define 'call' or 'put'.")
# Gamma
gamma = norm.pdf(d1) / (S * sigma * np.sqrt(T))
# Theta
if option_type == 'call':
theta = -((S * norm.pdf(d1) * sigma) / (2 * np.sqrt(T))) - r * K * np.exp(-r * T) * norm.cdf(d2)
elif option_type == 'put':
theta = -((S * norm.pdf(d1) * sigma) / (2 * np.sqrt(T))) + r * K * np.exp(-r * T) * norm.cdf(-d2)
else:
raise ValueError("Invalid option type. Please define 'call' or 'put'.")
# Vega
vega = S * np.sqrt(T) * norm.pdf(d1)
# Rho
if option_type == 'call':
rho = K * T * np.exp(-r * T) * norm.cdf(d2)
elif option_type == 'put':
rho = -K * T * np.exp(-r * T) * norm.cdf(-d2)
if rounding != inf:
greeks = {
'delta': round(delta, rounding),
'gamma': round(gamma, rounding),
'theta': round(theta, rounding),
'vega': round(vega, rounding),
'rho': round(rho, rounding)}
else :
greeks = {
'delta': delta,
'gamma': gamma,
'theta': theta,
'vega': vega,
'rho': rho}
return greeks
def compare_greek_approximation(shock, greek_name, repriced_value):
greek = greeks[greek_name]
approx_change = greek * shock
approx_price = option_mkt_price + approx_change
abs_error = abs(approx_price - repriced_value)
print(f"\n--- {greek_name.upper()} ---")
print(f"Shock: {shock}")
print(f"{greek_name.capitalize()} * Shock: {approx_change:.4f}")
print(f"Approximated price: {approx_price:.4f}")
print(f"Repriced value: {repriced_value:.4f}")
print(f"Absolute error: {abs_error:.4f}") 8.7 Closed-Form Greeks from Black-Scholes
We calculate Greeks analytically using the BS model. Then:
- Apply a shock to one input (e.g., underlying price \(S_0\), volatility \(\sigma\), time to maturity \(T\), or interest rate \(r\))
- Use the Greek to linearly approximate the new price
- Compare to the new price from re-running the full BS formula
This approach is practical for small changes. For example:
If Delta = 0.4, then for every €1 increase in the underlying, the option price increases by €0.40
This linear approximation works well for small shocks, but fails to capture curvature for large shocks (where higher-order Greeks matter)
8.7.1 Why use first-order approximations?
- Faster: No need to re-run full pricing model
- Simple intuition: Traders can quickly estimate P&L
- Higher-order terms (Gamma, Vomma, etc.) are more accurate but computationally heavier and less intuitive
Show code
# Calculate implied vol and Greeks
print(implied_volatility(option_mkt_price, S0, K, T, r, option_type, initial_guess))0.3018261718750004
Show code
print(BS_greeks(100, 100, 1, 0.01, 0.2, "call", rounding = 4)){'delta': np.float64(0.5596), 'gamma': np.float64(0.0197), 'theta': np.float64(-4.4201), 'vega': np.float64(39.4479), 'rho': np.float64(47.5285)}
8.8 Greek-by-Greek Breakdown
8.8.1 Delta
Measures sensitivity of the option price to changes in \(S_0\)
First-order (linear) approximation:
\[ \Delta \cdot \Delta S \]
Show code
# Parameters
S0 = 154.73
timestep = 1 / 252
K = 155.0
T = 33 * timestep
r = 0.05
option_type = 'call'
initial_guess = 0.2
option_mkt_price = 7.1
implied_vol = implied_volatility(option_mkt_price, S0, K, T, r, option_type, initial_guess)
greeks = BS_greeks(S0, K, T, r, implied_vol, option_type)
# Positive Delta Shock
S0_shock = 1
repriced = BS_option_price(S0 + S0_shock, K, T, r, implied_vol, option_type)
compare_greek_approximation(S0_shock, 'delta', repriced)
--- DELTA ---
Shock: 1
Delta * Shock: 0.5393
Approximated price: 7.6393
Repriced value: 7.6509
Absolute error: 0.0116
Show code
# Negative Delta Shock
S0_shock = -1
repriced = BS_option_price(S0 + S0_shock, K, T, r, implied_vol, option_type)
compare_greek_approximation(S0_shock, 'delta', repriced)
--- DELTA ---
Shock: -1
Delta * Shock: -0.5393
Approximated price: 6.5607
Repriced value: 6.5724
Absolute error: 0.0117
Works well for small \(\Delta S\)
For larger shocks, Delta itself changes (Gamma effect), and approximation becomes poor
Example: Apply a ±1 unit shock to \(S_0\), compute change via Delta vs exact BS price → small difference = good linear approximation
8.8.2 Gamma
Measures the rate of change of Delta with respect to \(S_0\)
Captures curvature: needed when \(\Delta S\) is large
Quadratic approximation:
\[ \text{Change in price} \approx \Delta \cdot \Delta S + \frac{1}{2} \Gamma \cdot (\Delta S)^2 \]
For large shocks, relying only on Delta may result in nonsensical outputs (e.g., Delta > 1)
Gamma corrects this, but it’s still local and becomes unreliable with very large shocks
8.8.3 Vega
Sensitivity to volatility \(\sigma\)
Show code
vol_shock = 0.01
repriced = BS_option_price(S0, K, T, r, implied_vol + vol_shock, option_type)
compare_greek_approximation(vol_shock, 'vega', repriced)
--- VEGA ---
Shock: 0.01
Vega * Shock: 0.2223
Approximated price: 7.3223
Repriced value: 7.3222
Absolute error: 0.0001
Show code
vol_shock = -0.01
repriced = BS_option_price(S0, K, T, r, implied_vol + vol_shock, option_type)
compare_greek_approximation(vol_shock, 'vega', repriced)
--- VEGA ---
Shock: -0.01
Vega * Shock: -0.2223
Approximated price: 6.8777
Repriced value: 6.8776
Absolute error: 0.0001
Approximation:
\[ \text{Change in price} \approx \text{Vega} \cdot \Delta \sigma \]
- Good for small volatility changes
-
if Vega ≈ 0.22, and implied vol increases by 1%, price rises by approx €0.22
8.8.4 Theta
Sensitivity to passage of time (time decay)
Common time step: \(\Delta t = \frac{1}{252}\) (1 trading day)
Time decay is non-linear: as expiry nears, the erosion accelerates
Show code
time_shock = timestep * 30
repriced = BS_option_price(S0, K, T - time_shock, r, implied_vol, option_type)
compare_greek_approximation(time_shock, 'theta', repriced)
--- THETA ---
Shock: 0.11904761904761904
Theta * Shock: -3.5042
Approximated price: 3.5958
Repriced value: 1.9463
Absolute error: 1.6496
Show code
time_shock = -timestep * 30
repriced = BS_option_price(S0, K, T - time_shock, r, implied_vol, option_type)
compare_greek_approximation(time_shock, 'theta', repriced)
--- THETA ---
Shock: -0.11904761904761904
Theta * Shock: 3.5042
Approximated price: 10.6042
Repriced value: 10.1082
Absolute error: 0.4960
Approximation gets worse if you apply large shocks (e.g., remove 30 days at once)
8.8.5 Rho
Sensitivity to interest rate \(r\)
Typically neglected, but not always negligible
Shock usually ±1% (since rates are in %)
Example: a 1% increase in \(r\) might change option value by €0.09
Rho is often called the “black sheep” of the Greeks — less relevant in low-rate environments, but still useful in theory
Show code
r_shock = 0.01
repriced = BS_option_price(S0, K, T, r + r_shock, implied_vol, option_type)
compare_greek_approximation(r_shock, 'rho', repriced)
--- RHO ---
Shock: 0.01
Rho * Shock: 0.1000
Approximated price: 7.2000
Repriced value: 7.2003
Absolute error: 0.0003
8.9 Summary of Closed-Form Greek Use
- All Greeks are derived from the BS formula
- They serve as linear (or quadratic) approximations
- Valid mostly for small changes
- For larger shocks → better to use the full BS formula (or include higher-order Greeks)
8.10 Numerical Approximations
What happens when you move beyond vanilla options, or pricing models without closed-form solutions?
You can still calculate Greeks numerically using:
\[ \text{Greek} \approx \frac{V(x + \varepsilon) - V(x - \varepsilon)}{2\varepsilon} \]
Where:
\(x\) is the input variable (e.g., \(S_0, \sigma, T\)) \(\varepsilon\) is a small shift
This is a finite difference approach: generalizable and useful when analytical Greeks are unavailable
This method:
Works for any option (even exotic or path-dependent) Still assumes that the function is smooth and approximable locally
Think of it as a thought experiment even in BS world — it becomes essential when moving into models where closed-form expressions aren’t available.
8.11 Final Thoughts
Greeks give powerful local sensitivities
Good for quick P&L estimates, hedging, and risk management
Linear approximations break down with large shocks
When needed: go full re-pricing or use higher-order Greeks
Numerical methods allow Greek calculation for any product, but require careful tuning
Show code
S0_shock = 1
sigma_shock = 0.01
T_shock = timestep # one trading day
r_shock = 0.01
base_price = BS_option_price(S0, K, T, r, implied_vol, option_type)
# Delta (central difference)
f_plus = BS_option_price(S0 + S0_shock, K, T, r, implied_vol, option_type)
f_minus = BS_option_price(S0 - S0_shock, K, T, r, implied_vol, option_type)
numerical_delta = (f_plus - f_minus) / (2 * S0_shock)
# Gamma (central second difference)
numerical_gamma = (f_plus - 2 * base_price + f_minus) / (S0_shock**2)
# Vega (central difference in volatility)
f_plus_vol = BS_option_price(S0, K, T, r, implied_vol + sigma_shock, option_type)
f_minus_vol = BS_option_price(S0, K, T, r, implied_vol - sigma_shock, option_type)
numerical_vega = (f_plus_vol - f_minus_vol) / (2 * sigma_shock)
# Theta (forward difference in time)
f_minus_T = BS_option_price(S0, K, T - T_shock, r, implied_vol, option_type)
numerical_theta = (f_minus_T - base_price) / T_shock # time decay is negative
# Rho (central difference in rates)
f_plus_r = BS_option_price(S0, K, T, r + r_shock, implied_vol, option_type)
f_minus_r = BS_option_price(S0, K, T, r - r_shock, implied_vol, option_type)
numerical_rho = (f_plus_r - f_minus_r) / (2 * r_shock)
--- NUMERICAL GREEKS vs CLOSED-FORM ---
Delta: 0.5392 vs Closed-Form: 0.5393
Gamma: 0.023485 vs Closed-Form: 0.023492
Vega: 22.229498 vs Closed-Form: 22.229533
Theta: -29.634462 vs Closed-Form: -29.434990
Rho: 9.997053 vs Closed-Form: 9.997091
Show code
S0 <- 154.73
timestep <- 1/252
K <- 155
Time <- 33 * timestep
r <- 0.05
sigma <- 0.3018
S0_shock <- 1
# Step 1: calculate the new option price, after a positive shock f(x + h), via
# full repricing under BS closed formula
optprice_positiveshock <- black_scholes(S0 + S0_shock, K, Time, r, sigma, type = "call")
optprice_positiveshock[1] 7.650311
Show code
# Step 2: calculate the new option price, after a negative shock f(x - h), via
# full repricing under BS closed formula
optprice_negativeshock <- black_scholes(S0 - S0_shock, K, Time, r, sigma, type = "call")
optprice_negativeshock[1] 6.571866
Show code
# Step 3: calculate the delta via numerical approximation (central difference)
numerical_delta <- (optprice_positiveshock - optprice_negativeshock)/(2 * S0_shock)
numerical_delta[1] 0.5392222